diff options
Diffstat (limited to 'src/shader_recompiler')
233 files changed, 41653 insertions, 0 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt new file mode 100644 index 000000000..b5b7e5e83 --- /dev/null +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -0,0 +1,268 @@ | |||
| 1 | add_library(shader_recompiler STATIC | ||
| 2 | backend/bindings.h | ||
| 3 | backend/glasm/emit_context.cpp | ||
| 4 | backend/glasm/emit_context.h | ||
| 5 | backend/glasm/emit_glasm.cpp | ||
| 6 | backend/glasm/emit_glasm.h | ||
| 7 | backend/glasm/emit_glasm_barriers.cpp | ||
| 8 | backend/glasm/emit_glasm_bitwise_conversion.cpp | ||
| 9 | backend/glasm/emit_glasm_composite.cpp | ||
| 10 | backend/glasm/emit_glasm_context_get_set.cpp | ||
| 11 | backend/glasm/emit_glasm_control_flow.cpp | ||
| 12 | backend/glasm/emit_glasm_convert.cpp | ||
| 13 | backend/glasm/emit_glasm_floating_point.cpp | ||
| 14 | backend/glasm/emit_glasm_image.cpp | ||
| 15 | backend/glasm/emit_glasm_instructions.h | ||
| 16 | backend/glasm/emit_glasm_integer.cpp | ||
| 17 | backend/glasm/emit_glasm_logical.cpp | ||
| 18 | backend/glasm/emit_glasm_memory.cpp | ||
| 19 | backend/glasm/emit_glasm_not_implemented.cpp | ||
| 20 | backend/glasm/emit_glasm_select.cpp | ||
| 21 | backend/glasm/emit_glasm_shared_memory.cpp | ||
| 22 | backend/glasm/emit_glasm_special.cpp | ||
| 23 | backend/glasm/emit_glasm_undefined.cpp | ||
| 24 | backend/glasm/emit_glasm_warp.cpp | ||
| 25 | backend/glasm/reg_alloc.cpp | ||
| 26 | backend/glasm/reg_alloc.h | ||
| 27 | backend/glsl/emit_context.cpp | ||
| 28 | backend/glsl/emit_context.h | ||
| 29 | backend/glsl/emit_glsl.cpp | ||
| 30 | backend/glsl/emit_glsl.h | ||
| 31 | backend/glsl/emit_glsl_atomic.cpp | ||
| 32 | backend/glsl/emit_glsl_barriers.cpp | ||
| 33 | backend/glsl/emit_glsl_bitwise_conversion.cpp | ||
| 34 | backend/glsl/emit_glsl_composite.cpp | ||
| 35 | backend/glsl/emit_glsl_context_get_set.cpp | ||
| 36 | backend/glsl/emit_glsl_control_flow.cpp | ||
| 37 | backend/glsl/emit_glsl_convert.cpp | ||
| 38 | backend/glsl/emit_glsl_floating_point.cpp | ||
| 39 | backend/glsl/emit_glsl_image.cpp | ||
| 40 | backend/glsl/emit_glsl_instructions.h | ||
| 41 | backend/glsl/emit_glsl_integer.cpp | ||
| 42 | backend/glsl/emit_glsl_logical.cpp | ||
| 43 | backend/glsl/emit_glsl_memory.cpp | ||
| 44 | backend/glsl/emit_glsl_not_implemented.cpp | ||
| 45 | backend/glsl/emit_glsl_select.cpp | ||
| 46 | backend/glsl/emit_glsl_shared_memory.cpp | ||
| 47 | backend/glsl/emit_glsl_special.cpp | ||
| 48 | backend/glsl/emit_glsl_undefined.cpp | ||
| 49 | backend/glsl/emit_glsl_warp.cpp | ||
| 50 | backend/glsl/var_alloc.cpp | ||
| 51 | backend/glsl/var_alloc.h | ||
| 52 | backend/spirv/emit_context.cpp | ||
| 53 | backend/spirv/emit_context.h | ||
| 54 | backend/spirv/emit_spirv.cpp | ||
| 55 | backend/spirv/emit_spirv.h | ||
| 56 | backend/spirv/emit_spirv_atomic.cpp | ||
| 57 | backend/spirv/emit_spirv_barriers.cpp | ||
| 58 | backend/spirv/emit_spirv_bitwise_conversion.cpp | ||
| 59 | backend/spirv/emit_spirv_composite.cpp | ||
| 60 | backend/spirv/emit_spirv_context_get_set.cpp | ||
| 61 | backend/spirv/emit_spirv_control_flow.cpp | ||
| 62 | backend/spirv/emit_spirv_convert.cpp | ||
| 63 | backend/spirv/emit_spirv_floating_point.cpp | ||
| 64 | backend/spirv/emit_spirv_image.cpp | ||
| 65 | backend/spirv/emit_spirv_image_atomic.cpp | ||
| 66 | backend/spirv/emit_spirv_instructions.h | ||
| 67 | backend/spirv/emit_spirv_integer.cpp | ||
| 68 | backend/spirv/emit_spirv_logical.cpp | ||
| 69 | backend/spirv/emit_spirv_memory.cpp | ||
| 70 | backend/spirv/emit_spirv_select.cpp | ||
| 71 | backend/spirv/emit_spirv_shared_memory.cpp | ||
| 72 | backend/spirv/emit_spirv_special.cpp | ||
| 73 | backend/spirv/emit_spirv_undefined.cpp | ||
| 74 | backend/spirv/emit_spirv_warp.cpp | ||
| 75 | environment.h | ||
| 76 | exception.h | ||
| 77 | frontend/ir/abstract_syntax_list.h | ||
| 78 | frontend/ir/attribute.cpp | ||
| 79 | frontend/ir/attribute.h | ||
| 80 | frontend/ir/basic_block.cpp | ||
| 81 | frontend/ir/basic_block.h | ||
| 82 | frontend/ir/breadth_first_search.h | ||
| 83 | frontend/ir/condition.cpp | ||
| 84 | frontend/ir/condition.h | ||
| 85 | frontend/ir/flow_test.cpp | ||
| 86 | frontend/ir/flow_test.h | ||
| 87 | frontend/ir/ir_emitter.cpp | ||
| 88 | frontend/ir/ir_emitter.h | ||
| 89 | frontend/ir/microinstruction.cpp | ||
| 90 | frontend/ir/modifiers.h | ||
| 91 | frontend/ir/opcodes.cpp | ||
| 92 | frontend/ir/opcodes.h | ||
| 93 | frontend/ir/opcodes.inc | ||
| 94 | frontend/ir/patch.cpp | ||
| 95 | frontend/ir/patch.h | ||
| 96 | frontend/ir/post_order.cpp | ||
| 97 | frontend/ir/post_order.h | ||
| 98 | frontend/ir/pred.h | ||
| 99 | frontend/ir/program.cpp | ||
| 100 | frontend/ir/program.h | ||
| 101 | frontend/ir/reg.h | ||
| 102 | frontend/ir/type.cpp | ||
| 103 | frontend/ir/type.h | ||
| 104 | frontend/ir/value.cpp | ||
| 105 | frontend/ir/value.h | ||
| 106 | frontend/maxwell/control_flow.cpp | ||
| 107 | frontend/maxwell/control_flow.h | ||
| 108 | frontend/maxwell/decode.cpp | ||
| 109 | frontend/maxwell/decode.h | ||
| 110 | frontend/maxwell/indirect_branch_table_track.cpp | ||
| 111 | frontend/maxwell/indirect_branch_table_track.h | ||
| 112 | frontend/maxwell/instruction.h | ||
| 113 | frontend/maxwell/location.h | ||
| 114 | frontend/maxwell/maxwell.inc | ||
| 115 | frontend/maxwell/opcodes.cpp | ||
| 116 | frontend/maxwell/opcodes.h | ||
| 117 | frontend/maxwell/structured_control_flow.cpp | ||
| 118 | frontend/maxwell/structured_control_flow.h | ||
| 119 | frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | ||
| 120 | frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | ||
| 121 | frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | ||
| 122 | frontend/maxwell/translate/impl/barrier_operations.cpp | ||
| 123 | frontend/maxwell/translate/impl/bitfield_extract.cpp | ||
| 124 | frontend/maxwell/translate/impl/bitfield_insert.cpp | ||
| 125 | frontend/maxwell/translate/impl/branch_indirect.cpp | ||
| 126 | frontend/maxwell/translate/impl/common_encoding.h | ||
| 127 | frontend/maxwell/translate/impl/common_funcs.cpp | ||
| 128 | frontend/maxwell/translate/impl/common_funcs.h | ||
| 129 | frontend/maxwell/translate/impl/condition_code_set.cpp | ||
| 130 | frontend/maxwell/translate/impl/double_add.cpp | ||
| 131 | frontend/maxwell/translate/impl/double_compare_and_set.cpp | ||
| 132 | frontend/maxwell/translate/impl/double_fused_multiply_add.cpp | ||
| 133 | frontend/maxwell/translate/impl/double_min_max.cpp | ||
| 134 | frontend/maxwell/translate/impl/double_multiply.cpp | ||
| 135 | frontend/maxwell/translate/impl/double_set_predicate.cpp | ||
| 136 | frontend/maxwell/translate/impl/exit_program.cpp | ||
| 137 | frontend/maxwell/translate/impl/find_leading_one.cpp | ||
| 138 | frontend/maxwell/translate/impl/floating_point_add.cpp | ||
| 139 | frontend/maxwell/translate/impl/floating_point_compare.cpp | ||
| 140 | frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | ||
| 141 | frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | ||
| 142 | frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | ||
| 143 | frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | ||
| 144 | frontend/maxwell/translate/impl/floating_point_min_max.cpp | ||
| 145 | frontend/maxwell/translate/impl/floating_point_multi_function.cpp | ||
| 146 | frontend/maxwell/translate/impl/floating_point_multiply.cpp | ||
| 147 | frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | ||
| 148 | frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | ||
| 149 | frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | ||
| 150 | frontend/maxwell/translate/impl/half_floating_point_add.cpp | ||
| 151 | frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | ||
| 152 | frontend/maxwell/translate/impl/half_floating_point_helper.cpp | ||
| 153 | frontend/maxwell/translate/impl/half_floating_point_helper.h | ||
| 154 | frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | ||
| 155 | frontend/maxwell/translate/impl/half_floating_point_set.cpp | ||
| 156 | frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp | ||
| 157 | frontend/maxwell/translate/impl/impl.cpp | ||
| 158 | frontend/maxwell/translate/impl/impl.h | ||
| 159 | frontend/maxwell/translate/impl/integer_add.cpp | ||
| 160 | frontend/maxwell/translate/impl/integer_add_three_input.cpp | ||
| 161 | frontend/maxwell/translate/impl/integer_compare.cpp | ||
| 162 | frontend/maxwell/translate/impl/integer_compare_and_set.cpp | ||
| 163 | frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp | ||
| 164 | frontend/maxwell/translate/impl/integer_funnel_shift.cpp | ||
| 165 | frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | ||
| 166 | frontend/maxwell/translate/impl/integer_popcount.cpp | ||
| 167 | frontend/maxwell/translate/impl/integer_scaled_add.cpp | ||
| 168 | frontend/maxwell/translate/impl/integer_set_predicate.cpp | ||
| 169 | frontend/maxwell/translate/impl/integer_shift_left.cpp | ||
| 170 | frontend/maxwell/translate/impl/integer_shift_right.cpp | ||
| 171 | frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | ||
| 172 | frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | ||
| 173 | frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | ||
| 174 | frontend/maxwell/translate/impl/load_constant.cpp | ||
| 175 | frontend/maxwell/translate/impl/load_constant.h | ||
| 176 | frontend/maxwell/translate/impl/load_effective_address.cpp | ||
| 177 | frontend/maxwell/translate/impl/load_store_attribute.cpp | ||
| 178 | frontend/maxwell/translate/impl/load_store_local_shared.cpp | ||
| 179 | frontend/maxwell/translate/impl/load_store_memory.cpp | ||
| 180 | frontend/maxwell/translate/impl/logic_operation.cpp | ||
| 181 | frontend/maxwell/translate/impl/logic_operation_three_input.cpp | ||
| 182 | frontend/maxwell/translate/impl/move_predicate_to_register.cpp | ||
| 183 | frontend/maxwell/translate/impl/move_register.cpp | ||
| 184 | frontend/maxwell/translate/impl/move_register_to_predicate.cpp | ||
| 185 | frontend/maxwell/translate/impl/move_special_register.cpp | ||
| 186 | frontend/maxwell/translate/impl/not_implemented.cpp | ||
| 187 | frontend/maxwell/translate/impl/output_geometry.cpp | ||
| 188 | frontend/maxwell/translate/impl/pixel_load.cpp | ||
| 189 | frontend/maxwell/translate/impl/predicate_set_predicate.cpp | ||
| 190 | frontend/maxwell/translate/impl/predicate_set_register.cpp | ||
| 191 | frontend/maxwell/translate/impl/select_source_with_predicate.cpp | ||
| 192 | frontend/maxwell/translate/impl/surface_atomic_operations.cpp | ||
| 193 | frontend/maxwell/translate/impl/surface_load_store.cpp | ||
| 194 | frontend/maxwell/translate/impl/texture_fetch.cpp | ||
| 195 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | ||
| 196 | frontend/maxwell/translate/impl/texture_gather.cpp | ||
| 197 | frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | ||
| 198 | frontend/maxwell/translate/impl/texture_gradient.cpp | ||
| 199 | frontend/maxwell/translate/impl/texture_load.cpp | ||
| 200 | frontend/maxwell/translate/impl/texture_load_swizzled.cpp | ||
| 201 | frontend/maxwell/translate/impl/texture_mipmap_level.cpp | ||
| 202 | frontend/maxwell/translate/impl/texture_query.cpp | ||
| 203 | frontend/maxwell/translate/impl/video_helper.cpp | ||
| 204 | frontend/maxwell/translate/impl/video_helper.h | ||
| 205 | frontend/maxwell/translate/impl/video_minimum_maximum.cpp | ||
| 206 | frontend/maxwell/translate/impl/video_multiply_add.cpp | ||
| 207 | frontend/maxwell/translate/impl/video_set_predicate.cpp | ||
| 208 | frontend/maxwell/translate/impl/vote.cpp | ||
| 209 | frontend/maxwell/translate/impl/warp_shuffle.cpp | ||
| 210 | frontend/maxwell/translate/translate.cpp | ||
| 211 | frontend/maxwell/translate/translate.h | ||
| 212 | frontend/maxwell/translate_program.cpp | ||
| 213 | frontend/maxwell/translate_program.h | ||
| 214 | host_translate_info.h | ||
| 215 | ir_opt/collect_shader_info_pass.cpp | ||
| 216 | ir_opt/constant_propagation_pass.cpp | ||
| 217 | ir_opt/dead_code_elimination_pass.cpp | ||
| 218 | ir_opt/dual_vertex_pass.cpp | ||
| 219 | ir_opt/global_memory_to_storage_buffer_pass.cpp | ||
| 220 | ir_opt/identity_removal_pass.cpp | ||
| 221 | ir_opt/lower_fp16_to_fp32.cpp | ||
| 222 | ir_opt/lower_int64_to_int32.cpp | ||
| 223 | ir_opt/passes.h | ||
| 224 | ir_opt/ssa_rewrite_pass.cpp | ||
| 225 | ir_opt/texture_pass.cpp | ||
| 226 | ir_opt/verification_pass.cpp | ||
| 227 | object_pool.h | ||
| 228 | profile.h | ||
| 229 | program_header.h | ||
| 230 | runtime_info.h | ||
| 231 | shader_info.h | ||
| 232 | varying_state.h | ||
| 233 | ) | ||
| 234 | |||
| 235 | target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) | ||
| 236 | |||
| 237 | if (MSVC) | ||
| 238 | target_compile_options(shader_recompiler PRIVATE | ||
| 239 | /W4 | ||
| 240 | /WX | ||
| 241 | /we4018 # 'expression' : signed/unsigned mismatch | ||
| 242 | /we4244 # 'argument' : conversion from 'type1' to 'type2', possible loss of data (floating-point) | ||
| 243 | /we4245 # 'conversion' : conversion from 'type1' to 'type2', signed/unsigned mismatch | ||
| 244 | /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data | ||
| 245 | /we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data | ||
| 246 | /we4305 # 'context' : truncation from 'type1' to 'type2' | ||
| 247 | /we4800 # Implicit conversion from 'type' to bool. Possible information loss | ||
| 248 | /we4826 # Conversion from 'type1' to 'type2' is sign-extended. This may cause unexpected runtime behavior. | ||
| 249 | ) | ||
| 250 | else() | ||
| 251 | target_compile_options(shader_recompiler PRIVATE | ||
| 252 | -Werror | ||
| 253 | -Werror=conversion | ||
| 254 | -Werror=ignored-qualifiers | ||
| 255 | -Werror=implicit-fallthrough | ||
| 256 | -Werror=shadow | ||
| 257 | -Werror=sign-compare | ||
| 258 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> | ||
| 259 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> | ||
| 260 | -Werror=unused-variable | ||
| 261 | |||
| 262 | # Bracket depth determines maximum size of a fold expression in Clang since 9c9974c3ccb6. | ||
| 263 | # And this in turns limits the size of a std::array. | ||
| 264 | $<$<CXX_COMPILER_ID:Clang>:-fbracket-depth=1024> | ||
| 265 | ) | ||
| 266 | endif() | ||
| 267 | |||
| 268 | create_target_directory_groups(shader_recompiler) | ||
diff --git a/src/shader_recompiler/backend/bindings.h b/src/shader_recompiler/backend/bindings.h new file mode 100644 index 000000000..35503000c --- /dev/null +++ b/src/shader_recompiler/backend/bindings.h | |||
| @@ -0,0 +1,19 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend { | ||
| 10 | |||
| 11 | struct Bindings { | ||
| 12 | u32 unified{}; | ||
| 13 | u32 uniform_buffer{}; | ||
| 14 | u32 storage_buffer{}; | ||
| 15 | u32 texture{}; | ||
| 16 | u32 image{}; | ||
| 17 | }; | ||
| 18 | |||
| 19 | } // namespace Shader::Backend | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp new file mode 100644 index 000000000..069c019ad --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/bindings.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | std::string_view InterpDecorator(Interpolation interp) { | ||
| 16 | switch (interp) { | ||
| 17 | case Interpolation::Smooth: | ||
| 18 | return ""; | ||
| 19 | case Interpolation::Flat: | ||
| 20 | return "FLAT "; | ||
| 21 | case Interpolation::NoPerspective: | ||
| 22 | return "NOPERSPECTIVE "; | ||
| 23 | } | ||
| 24 | throw InvalidArgument("Invalid interpolation {}", interp); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool IsInputArray(Stage stage) { | ||
| 28 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 29 | stage == Stage::TessellationEval; | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 34 | const RuntimeInfo& runtime_info_) | ||
| 35 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { | ||
| 36 | // FIXME: Temporary partial implementation | ||
| 37 | u32 cbuf_index{}; | ||
| 38 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 39 | if (desc.count != 1) { | ||
| 40 | throw NotImplementedException("Constant buffer descriptor array"); | ||
| 41 | } | ||
| 42 | Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); | ||
| 43 | ++cbuf_index; | ||
| 44 | } | ||
| 45 | u32 ssbo_index{}; | ||
| 46 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 47 | if (desc.count != 1) { | ||
| 48 | throw NotImplementedException("Storage buffer descriptor array"); | ||
| 49 | } | ||
| 50 | if (runtime_info.glasm_use_storage_buffers) { | ||
| 51 | Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); | ||
| 52 | ++bindings.storage_buffer; | ||
| 53 | ++ssbo_index; | ||
| 54 | } | ||
| 55 | } | ||
| 56 | if (!runtime_info.glasm_use_storage_buffers) { | ||
| 57 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { | ||
| 58 | Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | stage = program.stage; | ||
| 62 | switch (program.stage) { | ||
| 63 | case Stage::VertexA: | ||
| 64 | case Stage::VertexB: | ||
| 65 | stage_name = "vertex"; | ||
| 66 | attrib_name = "vertex"; | ||
| 67 | break; | ||
| 68 | case Stage::TessellationControl: | ||
| 69 | case Stage::TessellationEval: | ||
| 70 | stage_name = "primitive"; | ||
| 71 | attrib_name = "primitive"; | ||
| 72 | break; | ||
| 73 | case Stage::Geometry: | ||
| 74 | stage_name = "primitive"; | ||
| 75 | attrib_name = "vertex"; | ||
| 76 | break; | ||
| 77 | case Stage::Fragment: | ||
| 78 | stage_name = "fragment"; | ||
| 79 | attrib_name = "fragment"; | ||
| 80 | break; | ||
| 81 | case Stage::Compute: | ||
| 82 | stage_name = "invocation"; | ||
| 83 | break; | ||
| 84 | } | ||
| 85 | const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; | ||
| 86 | const VaryingState loads{info.loads.mask | info.passthrough.mask}; | ||
| 87 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 88 | if (loads.Generic(index)) { | ||
| 89 | Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", | ||
| 90 | InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 94 | Add("ATTRIB vertex_position=vertex.position;"); | ||
| 95 | } | ||
| 96 | if (info.uses_invocation_id) { | ||
| 97 | Add("ATTRIB primitive_invocation=primitive.invocation;"); | ||
| 98 | } | ||
| 99 | if (info.stores_tess_level_outer) { | ||
| 100 | Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); | ||
| 101 | } | ||
| 102 | if (info.stores_tess_level_inner) { | ||
| 103 | Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); | ||
| 104 | } | ||
| 105 | if (info.stores.ClipDistances()) { | ||
| 106 | Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); | ||
| 107 | } | ||
| 108 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 109 | if (!info.uses_patches[index]) { | ||
| 110 | continue; | ||
| 111 | } | ||
| 112 | if (stage == Stage::TessellationControl) { | ||
| 113 | Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};" | ||
| 114 | "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};", | ||
| 115 | index, index, index, index, index, index); | ||
| 116 | } else { | ||
| 117 | Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, | ||
| 118 | index, index); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | if (stage == Stage::Fragment) { | ||
| 122 | Add("OUTPUT frag_color0=result.color;"); | ||
| 123 | for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { | ||
| 124 | Add("OUTPUT frag_color{}=result.color[{}];", index, index); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 128 | if (info.stores.Generic(index)) { | ||
| 129 | Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); | ||
| 133 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 134 | image_buffer_bindings.push_back(bindings.image); | ||
| 135 | bindings.image += desc.count; | ||
| 136 | } | ||
| 137 | image_bindings.reserve(info.image_descriptors.size()); | ||
| 138 | for (const auto& desc : info.image_descriptors) { | ||
| 139 | image_bindings.push_back(bindings.image); | ||
| 140 | bindings.image += desc.count; | ||
| 141 | } | ||
| 142 | texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); | ||
| 143 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 144 | texture_buffer_bindings.push_back(bindings.texture); | ||
| 145 | bindings.texture += desc.count; | ||
| 146 | } | ||
| 147 | texture_bindings.reserve(info.texture_descriptors.size()); | ||
| 148 | for (const auto& desc : info.texture_descriptors) { | ||
| 149 | texture_bindings.push_back(bindings.texture); | ||
| 150 | bindings.texture += desc.count; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h new file mode 100644 index 000000000..8433e5c00 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.h | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader { | ||
| 17 | struct Info; | ||
| 18 | struct Profile; | ||
| 19 | struct RuntimeInfo; | ||
| 20 | } // namespace Shader | ||
| 21 | |||
| 22 | namespace Shader::Backend { | ||
| 23 | struct Bindings; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace Shader::IR { | ||
| 27 | class Inst; | ||
| 28 | struct Program; | ||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | namespace Shader::Backend::GLASM { | ||
| 32 | |||
| 33 | class EmitContext { | ||
| 34 | public: | ||
| 35 | explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 36 | const RuntimeInfo& runtime_info_); | ||
| 37 | |||
| 38 | template <typename... Args> | ||
| 39 | void Add(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 40 | code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst), | ||
| 41 | std::forward<Args>(args)...); | ||
| 42 | // TODO: Remove this | ||
| 43 | code += '\n'; | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename... Args> | ||
| 47 | void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 48 | code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst), | ||
| 49 | std::forward<Args>(args)...); | ||
| 50 | // TODO: Remove this | ||
| 51 | code += '\n'; | ||
| 52 | } | ||
| 53 | |||
| 54 | template <typename... Args> | ||
| 55 | void Add(const char* format_str, Args&&... args) { | ||
| 56 | code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); | ||
| 57 | // TODO: Remove this | ||
| 58 | code += '\n'; | ||
| 59 | } | ||
| 60 | |||
| 61 | std::string code; | ||
| 62 | RegAlloc reg_alloc{}; | ||
| 63 | const Info& info; | ||
| 64 | const Profile& profile; | ||
| 65 | const RuntimeInfo& runtime_info; | ||
| 66 | |||
| 67 | std::vector<u32> texture_buffer_bindings; | ||
| 68 | std::vector<u32> image_buffer_bindings; | ||
| 69 | std::vector<u32> texture_bindings; | ||
| 70 | std::vector<u32> image_bindings; | ||
| 71 | |||
| 72 | Stage stage{}; | ||
| 73 | std::string_view stage_name = "invalid"; | ||
| 74 | std::string_view attrib_name = "invalid"; | ||
| 75 | |||
| 76 | u32 num_safety_loop_vars{}; | ||
| 77 | bool uses_y_direction{}; | ||
| 78 | }; | ||
| 79 | |||
| 80 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..a5e8c9b6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -0,0 +1,492 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/div_ceil.h" | ||
| 10 | #include "common/settings.h" | ||
| 11 | #include "shader_recompiler/backend/bindings.h" | ||
| 12 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 13 | #include "shader_recompiler/backend/glasm/emit_glasm.h" | ||
| 14 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 17 | #include "shader_recompiler/profile.h" | ||
| 18 | #include "shader_recompiler/runtime_info.h" | ||
| 19 | |||
| 20 | namespace Shader::Backend::GLASM { | ||
| 21 | namespace { | ||
| 22 | template <class Func> | ||
| 23 | struct FuncTraits {}; | ||
| 24 | |||
| 25 | template <class ReturnType_, class... Args> | ||
| 26 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 27 | using ReturnType = ReturnType_; | ||
| 28 | |||
| 29 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 30 | |||
| 31 | template <size_t I> | ||
| 32 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 33 | }; | ||
| 34 | |||
| 35 | template <typename T> | ||
| 36 | struct Identity { | ||
| 37 | Identity(T data_) : data{data_} {} | ||
| 38 | |||
| 39 | T Extract() { | ||
| 40 | return data; | ||
| 41 | } | ||
| 42 | |||
| 43 | T data; | ||
| 44 | }; | ||
| 45 | |||
| 46 | template <bool scalar> | ||
| 47 | class RegWrapper { | ||
| 48 | public: | ||
| 49 | RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { | ||
| 50 | const Value value{reg_alloc.Peek(ir_value)}; | ||
| 51 | if (value.type == Type::Register) { | ||
| 52 | inst = ir_value.InstRecursive(); | ||
| 53 | reg = Register{value}; | ||
| 54 | } else { | ||
| 55 | reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); | ||
| 56 | } | ||
| 57 | switch (value.type) { | ||
| 58 | case Type::Register: | ||
| 59 | case Type::Void: | ||
| 60 | break; | ||
| 61 | case Type::U32: | ||
| 62 | ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); | ||
| 63 | break; | ||
| 64 | case Type::U64: | ||
| 65 | ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); | ||
| 66 | break; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | auto Extract() { | ||
| 71 | if (inst) { | ||
| 72 | reg_alloc.Unref(*inst); | ||
| 73 | } else { | ||
| 74 | reg_alloc.FreeReg(reg); | ||
| 75 | } | ||
| 76 | return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; | ||
| 77 | } | ||
| 78 | |||
| 79 | private: | ||
| 80 | RegAlloc& reg_alloc; | ||
| 81 | IR::Inst* inst{}; | ||
| 82 | Register reg{}; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename ArgType> | ||
| 86 | class ValueWrapper { | ||
| 87 | public: | ||
| 88 | ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) | ||
| 89 | : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} | ||
| 90 | |||
| 91 | ArgType Extract() { | ||
| 92 | if (!ir_value.IsImmediate()) { | ||
| 93 | reg_alloc.Unref(*ir_value.InstRecursive()); | ||
| 94 | } | ||
| 95 | return value; | ||
| 96 | } | ||
| 97 | |||
| 98 | private: | ||
| 99 | RegAlloc& reg_alloc; | ||
| 100 | const IR::Value& ir_value; | ||
| 101 | ArgType value; | ||
| 102 | }; | ||
| 103 | |||
| 104 | template <typename ArgType> | ||
| 105 | auto Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 106 | if constexpr (std::is_same_v<ArgType, Register>) { | ||
| 107 | return RegWrapper<false>{ctx, arg}; | ||
| 108 | } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { | ||
| 109 | return RegWrapper<true>{ctx, arg}; | ||
| 110 | } else if constexpr (std::is_base_of_v<Value, ArgType>) { | ||
| 111 | return ValueWrapper<ArgType>{ctx, arg}; | ||
| 112 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 113 | return Identity<const IR::Value&>{arg}; | ||
| 114 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 115 | return Identity{arg.U32()}; | ||
| 116 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 117 | return Identity{arg.Attribute()}; | ||
| 118 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 119 | return Identity{arg.Patch()}; | ||
| 120 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 121 | return Identity{arg.Reg()}; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | template <auto func, bool is_first_arg_inst> | ||
| 126 | struct InvokeCall { | ||
| 127 | template <typename... Args> | ||
| 128 | InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { | ||
| 129 | if constexpr (is_first_arg_inst) { | ||
| 130 | func(ctx, *inst, args.Extract()...); | ||
| 131 | } else { | ||
| 132 | func(ctx, args.Extract()...); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 138 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 139 | using Traits = FuncTraits<decltype(func)>; | ||
| 140 | if constexpr (is_first_arg_inst) { | ||
| 141 | InvokeCall<func, is_first_arg_inst>{ | ||
| 142 | ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; | ||
| 143 | } else { | ||
| 144 | InvokeCall<func, is_first_arg_inst>{ | ||
| 145 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | template <auto func> | ||
| 150 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 151 | using Traits = FuncTraits<decltype(func)>; | ||
| 152 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 153 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 154 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 155 | } else { | ||
| 156 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 157 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; | ||
| 158 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 159 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 164 | switch (inst->GetOpcode()) { | ||
| 165 | #define OPCODE(name, result_type, ...) \ | ||
| 166 | case IR::Opcode::name: \ | ||
| 167 | return Invoke<&Emit##name>(ctx, inst); | ||
| 168 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 169 | #undef OPCODE | ||
| 170 | } | ||
| 171 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 172 | } | ||
| 173 | |||
| 174 | bool IsReference(IR::Inst& inst) { | ||
| 175 | return inst.GetOpcode() == IR::Opcode::Reference; | ||
| 176 | } | ||
| 177 | |||
| 178 | void PrecolorInst(IR::Inst& phi) { | ||
| 179 | // Insert phi moves before references to avoid overwritting other phis | ||
| 180 | const size_t num_args{phi.NumArgs()}; | ||
| 181 | for (size_t i = 0; i < num_args; ++i) { | ||
| 182 | IR::Block& phi_block{*phi.PhiBlock(i)}; | ||
| 183 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | ||
| 184 | IR::IREmitter ir{phi_block, it}; | ||
| 185 | const IR::Value arg{phi.Arg(i)}; | ||
| 186 | if (arg.IsImmediate()) { | ||
| 187 | ir.PhiMove(phi, arg); | ||
| 188 | } else { | ||
| 189 | ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | for (size_t i = 0; i < num_args; ++i) { | ||
| 193 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | void Precolor(const IR::Program& program) { | ||
| 198 | for (IR::Block* const block : program.blocks) { | ||
| 199 | for (IR::Inst& phi : block->Instructions()) { | ||
| 200 | if (!IR::IsPhi(phi)) { | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | PrecolorInst(phi); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||
| 209 | const auto eval{ | ||
| 210 | [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; | ||
| 211 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 212 | switch (node.type) { | ||
| 213 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 214 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 215 | EmitInst(ctx, &inst); | ||
| 216 | } | ||
| 217 | break; | ||
| 218 | case IR::AbstractSyntaxNode::Type::If: | ||
| 219 | ctx.Add("MOV.S.CC RC,{};" | ||
| 220 | "IF NE.x;", | ||
| 221 | eval(node.data.if_node.cond)); | ||
| 222 | break; | ||
| 223 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 224 | ctx.Add("ENDIF;"); | ||
| 225 | break; | ||
| 226 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 227 | ctx.Add("REP;"); | ||
| 228 | break; | ||
| 229 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 230 | if (!Settings::values.disable_shader_loop_safety_checks) { | ||
| 231 | const u32 loop_index{ctx.num_safety_loop_vars++}; | ||
| 232 | const u32 vector_index{loop_index / 4}; | ||
| 233 | const char component{"xyzw"[loop_index % 4]}; | ||
| 234 | ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" | ||
| 235 | "BRK(LT.{});", | ||
| 236 | vector_index, component, vector_index, component, component); | ||
| 237 | } | ||
| 238 | if (node.data.repeat.cond.IsImmediate()) { | ||
| 239 | if (node.data.repeat.cond.U1()) { | ||
| 240 | ctx.Add("ENDREP;"); | ||
| 241 | } else { | ||
| 242 | ctx.Add("BRK;" | ||
| 243 | "ENDREP;"); | ||
| 244 | } | ||
| 245 | } else { | ||
| 246 | ctx.Add("MOV.S.CC RC,{};" | ||
| 247 | "BRK(EQ.x);" | ||
| 248 | "ENDREP;", | ||
| 249 | eval(node.data.repeat.cond)); | ||
| 250 | } | ||
| 251 | break; | ||
| 252 | case IR::AbstractSyntaxNode::Type::Break: | ||
| 253 | if (node.data.break_node.cond.IsImmediate()) { | ||
| 254 | if (node.data.break_node.cond.U1()) { | ||
| 255 | ctx.Add("BRK;"); | ||
| 256 | } | ||
| 257 | } else { | ||
| 258 | ctx.Add("MOV.S.CC RC,{};" | ||
| 259 | "BRK (NE.x);", | ||
| 260 | eval(node.data.break_node.cond)); | ||
| 261 | } | ||
| 262 | break; | ||
| 263 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 264 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 265 | ctx.Add("RET;"); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | if (!ctx.reg_alloc.IsEmpty()) { | ||
| 270 | LOG_WARNING(Shader_GLASM, "Register leak after generating code"); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void SetupOptions(const IR::Program& program, const Profile& profile, | ||
| 275 | const RuntimeInfo& runtime_info, std::string& header) { | ||
| 276 | const Info& info{program.info}; | ||
| 277 | const Stage stage{program.stage}; | ||
| 278 | |||
| 279 | // TODO: Track the shared atomic ops | ||
| 280 | header += "OPTION NV_internal;" | ||
| 281 | "OPTION NV_shader_storage_buffer;" | ||
| 282 | "OPTION NV_gpu_program_fp64;"; | ||
| 283 | if (info.uses_int64_bit_atomics) { | ||
| 284 | header += "OPTION NV_shader_atomic_int64;"; | ||
| 285 | } | ||
| 286 | if (info.uses_atomic_f32_add) { | ||
| 287 | header += "OPTION NV_shader_atomic_float;"; | ||
| 288 | } | ||
| 289 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 290 | header += "OPTION NV_shader_atomic_fp16_vector;"; | ||
| 291 | } | ||
| 292 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 293 | info.uses_fswzadd) { | ||
| 294 | header += "OPTION NV_shader_thread_group;"; | ||
| 295 | } | ||
| 296 | if (info.uses_subgroup_shuffles) { | ||
| 297 | header += "OPTION NV_shader_thread_shuffle;"; | ||
| 298 | } | ||
| 299 | if (info.uses_sparse_residency) { | ||
| 300 | header += "OPTION EXT_sparse_texture2;"; | ||
| 301 | } | ||
| 302 | const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || | ||
| 303 | info.stores[IR::Attribute::Layer]}; | ||
| 304 | if ((stage != Stage::Geometry && stores_viewport_layer) || | ||
| 305 | info.stores[IR::Attribute::ViewportMask]) { | ||
| 306 | if (profile.support_viewport_index_layer_non_geometry) { | ||
| 307 | header += "OPTION NV_viewport_array2;"; | ||
| 308 | } | ||
| 309 | } | ||
| 310 | if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { | ||
| 311 | header += "OPTION NV_geometry_shader_passthrough;"; | ||
| 312 | } | ||
| 313 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||
| 314 | header += "OPTION EXT_shader_image_load_formatted;"; | ||
| 315 | } | ||
| 316 | if (profile.support_derivative_control) { | ||
| 317 | header += "OPTION ARB_derivative_control;"; | ||
| 318 | } | ||
| 319 | if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { | ||
| 320 | header += "OPTION NV_early_fragment_tests;"; | ||
| 321 | } | ||
| 322 | if (stage == Stage::Fragment) { | ||
| 323 | header += "OPTION ARB_draw_buffers;"; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | std::string_view StageHeader(Stage stage) { | ||
| 328 | switch (stage) { | ||
| 329 | case Stage::VertexA: | ||
| 330 | case Stage::VertexB: | ||
| 331 | return "!!NVvp5.0\n"; | ||
| 332 | case Stage::TessellationControl: | ||
| 333 | return "!!NVtcp5.0\n"; | ||
| 334 | case Stage::TessellationEval: | ||
| 335 | return "!!NVtep5.0\n"; | ||
| 336 | case Stage::Geometry: | ||
| 337 | return "!!NVgp5.0\n"; | ||
| 338 | case Stage::Fragment: | ||
| 339 | return "!!NVfp5.0\n"; | ||
| 340 | case Stage::Compute: | ||
| 341 | return "!!NVcp5.0\n"; | ||
| 342 | } | ||
| 343 | throw InvalidArgument("Invalid stage {}", stage); | ||
| 344 | } | ||
| 345 | |||
| 346 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 347 | switch (topology) { | ||
| 348 | case InputTopology::Points: | ||
| 349 | return "POINTS"; | ||
| 350 | case InputTopology::Lines: | ||
| 351 | return "LINES"; | ||
| 352 | case InputTopology::LinesAdjacency: | ||
| 353 | return "LINESS_ADJACENCY"; | ||
| 354 | case InputTopology::Triangles: | ||
| 355 | return "TRIANGLES"; | ||
| 356 | case InputTopology::TrianglesAdjacency: | ||
| 357 | return "TRIANGLES_ADJACENCY"; | ||
| 358 | } | ||
| 359 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 360 | } | ||
| 361 | |||
| 362 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 363 | switch (topology) { | ||
| 364 | case OutputTopology::PointList: | ||
| 365 | return "POINTS"; | ||
| 366 | case OutputTopology::LineStrip: | ||
| 367 | return "LINE_STRIP"; | ||
| 368 | case OutputTopology::TriangleStrip: | ||
| 369 | return "TRIANGLE_STRIP"; | ||
| 370 | } | ||
| 371 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 372 | } | ||
| 373 | |||
| 374 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 375 | switch (primitive) { | ||
| 376 | case TessPrimitive::Triangles: | ||
| 377 | return "TRIANGLES"; | ||
| 378 | case TessPrimitive::Quads: | ||
| 379 | return "QUADS"; | ||
| 380 | case TessPrimitive::Isolines: | ||
| 381 | return "ISOLINES"; | ||
| 382 | } | ||
| 383 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 384 | } | ||
| 385 | |||
| 386 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 387 | switch (spacing) { | ||
| 388 | case TessSpacing::Equal: | ||
| 389 | return "EQUAL"; | ||
| 390 | case TessSpacing::FractionalOdd: | ||
| 391 | return "FRACTIONAL_ODD"; | ||
| 392 | case TessSpacing::FractionalEven: | ||
| 393 | return "FRACTIONAL_EVEN"; | ||
| 394 | } | ||
| 395 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 396 | } | ||
| 397 | } // Anonymous namespace | ||
| 398 | |||
| 399 | std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, | ||
| 400 | Bindings& bindings) { | ||
| 401 | EmitContext ctx{program, bindings, profile, runtime_info}; | ||
| 402 | Precolor(program); | ||
| 403 | EmitCode(ctx, program); | ||
| 404 | std::string header{StageHeader(program.stage)}; | ||
| 405 | SetupOptions(program, profile, runtime_info, header); | ||
| 406 | switch (program.stage) { | ||
| 407 | case Stage::TessellationControl: | ||
| 408 | header += fmt::format("VERTICES_OUT {};", program.invocations); | ||
| 409 | break; | ||
| 410 | case Stage::TessellationEval: | ||
| 411 | header += fmt::format("TESS_MODE {};" | ||
| 412 | "TESS_SPACING {};" | ||
| 413 | "TESS_VERTEX_ORDER {};", | ||
| 414 | GetTessMode(runtime_info.tess_primitive), | ||
| 415 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 416 | runtime_info.tess_clockwise ? "CW" : "CCW"); | ||
| 417 | break; | ||
| 418 | case Stage::Geometry: | ||
| 419 | header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); | ||
| 420 | if (program.is_geometry_passthrough) { | ||
| 421 | if (profile.support_geometry_shader_passthrough) { | ||
| 422 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 423 | if (program.info.passthrough.Generic(index)) { | ||
| 424 | header += fmt::format("PASSTHROUGH result.attrib[{}];", index); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 428 | header += "PASSTHROUGH result.position;"; | ||
| 429 | } | ||
| 430 | } else { | ||
| 431 | LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); | ||
| 432 | } | ||
| 433 | } else { | ||
| 434 | header += | ||
| 435 | fmt::format("VERTICES_OUT {};" | ||
| 436 | "PRIMITIVE_OUT {};", | ||
| 437 | program.output_vertices, OutputPrimitive(program.output_topology)); | ||
| 438 | } | ||
| 439 | break; | ||
| 440 | case Stage::Compute: | ||
| 441 | header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], | ||
| 442 | program.workgroup_size[1], program.workgroup_size[2]); | ||
| 443 | break; | ||
| 444 | default: | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | if (program.shared_memory_size > 0) { | ||
| 448 | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); | ||
| 449 | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); | ||
| 450 | } | ||
| 451 | header += "TEMP "; | ||
| 452 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { | ||
| 453 | header += fmt::format("R{},", index); | ||
| 454 | } | ||
| 455 | if (program.local_memory_size > 0) { | ||
| 456 | header += fmt::format("lmem[{}],", program.local_memory_size); | ||
| 457 | } | ||
| 458 | if (program.info.uses_fswzadd) { | ||
| 459 | header += "FSWZA[4],FSWZB[4],"; | ||
| 460 | } | ||
| 461 | const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; | ||
| 462 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 463 | header += fmt::format("loop{},", index); | ||
| 464 | } | ||
| 465 | header += "RC;" | ||
| 466 | "LONG TEMP "; | ||
| 467 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { | ||
| 468 | header += fmt::format("D{},", index); | ||
| 469 | } | ||
| 470 | header += "DC;"; | ||
| 471 | if (program.info.uses_fswzadd) { | ||
| 472 | header += "MOV.F FSWZA[0],-1;" | ||
| 473 | "MOV.F FSWZA[1],1;" | ||
| 474 | "MOV.F FSWZA[2],-1;" | ||
| 475 | "MOV.F FSWZA[3],0;" | ||
| 476 | "MOV.F FSWZB[0],-1;" | ||
| 477 | "MOV.F FSWZB[1],-1;" | ||
| 478 | "MOV.F FSWZB[2],1;" | ||
| 479 | "MOV.F FSWZB[3],-1;"; | ||
| 480 | } | ||
| 481 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 482 | header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); | ||
| 483 | } | ||
| 484 | if (ctx.uses_y_direction) { | ||
| 485 | header += "PARAM y_direction[1]={state.material.front.ambient};"; | ||
| 486 | } | ||
| 487 | ctx.code.insert(0, header); | ||
| 488 | ctx.code += "END"; | ||
| 489 | return ctx.code; | ||
| 490 | } | ||
| 491 | |||
| 492 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h new file mode 100644 index 000000000..bcb55f062 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/bindings.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | #include "shader_recompiler/runtime_info.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLASM { | ||
| 15 | |||
| 16 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 17 | IR::Program& program, Bindings& bindings); | ||
| 18 | |||
| 19 | [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 20 | IR::Program& program) { | ||
| 21 | Bindings binding; | ||
| 22 | return EmitGLASM(profile, runtime_info, program, binding); | ||
| 23 | } | ||
| 24 | |||
| 25 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp new file mode 100644 index 000000000..9201ccd39 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | |||
| 11 | static void Alias(IR::Inst& inst, const IR::Value& value) { | ||
| 12 | if (value.IsImmediate()) { | ||
| 13 | return; | ||
| 14 | } | ||
| 15 | IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; | ||
| 16 | value_inst.DestructiveAddUsage(inst.UseCount()); | ||
| 17 | value_inst.DestructiveRemoveUsage(); | ||
| 18 | inst.SetDefinition(value_inst.Definition<Id>()); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 22 | Alias(inst, value); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { | ||
| 26 | // Fake one usage to get a real register out of the condition | ||
| 27 | inst.DestructiveAddUsage(1); | ||
| 28 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 29 | const ScalarS32 input{ctx.reg_alloc.Consume(value)}; | ||
| 30 | if (ret != input) { | ||
| 31 | ctx.Add("MOV.S {},{};", ret, input); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 36 | Alias(inst, value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 40 | Alias(inst, value); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 44 | Alias(inst, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 48 | Alias(inst, value); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 52 | Alias(inst, value); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 56 | Alias(inst, value); | ||
| 57 | } | ||
| 58 | |||
| 59 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 60 | ctx.LongAdd("PK64.U {}.x,{};", inst, value); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 64 | ctx.Add("UP64.U {}.xy,{}.x;", inst, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 68 | throw NotImplementedException("GLASM instruction"); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 72 | throw NotImplementedException("GLASM instruction"); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | ctx.Add("PK2H {}.x,{};", inst, value); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 80 | ctx.Add("UP2H {}.xy,{}.x;", inst, value); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 84 | ctx.LongAdd("PK64 {}.x,{};", inst, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 88 | ctx.Add("UP64 {}.xy,{}.x;", inst, value); | ||
| 89 | } | ||
| 90 | |||
| 91 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp new file mode 100644 index 000000000..bff0b7c1c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp | |||
| @@ -0,0 +1,244 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | template <auto read_imm, char type, typename... Values> | ||
| 12 | void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { | ||
| 13 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 14 | if (std::ranges::any_of(std::array{elements...}, | ||
| 15 | [](const IR::Value& value) { return value.IsImmediate(); })) { | ||
| 16 | using Type = std::invoke_result_t<decltype(read_imm), IR::Value>; | ||
| 17 | const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...}; | ||
| 18 | ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]), | ||
| 19 | fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); | ||
| 20 | } | ||
| 21 | size_t index{}; | ||
| 22 | for (const IR::Value& element : {elements...}) { | ||
| 23 | if (!element.IsImmediate()) { | ||
| 24 | const ScalarU32 value{ctx.reg_alloc.Consume(element)}; | ||
| 25 | ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value); | ||
| 26 | } | ||
| 27 | ++index; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) { | ||
| 32 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 33 | if (ret == composite && index == 0) { | ||
| 34 | // No need to do anything here, the source and destination are the same register | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]); | ||
| 38 | } | ||
| 39 | |||
| 40 | template <typename ObjectType> | ||
| 41 | void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, | ||
| 42 | u32 index, char type) { | ||
| 43 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 44 | const char swizzle{"xyzw"[index]}; | ||
| 45 | if (ret != composite && ret == object) { | ||
| 46 | // The object is aliased with the return value, so we have to use a temporary to insert | ||
| 47 | ctx.Add("MOV.{} RC,{};" | ||
| 48 | "MOV.{} RC.{},{};" | ||
| 49 | "MOV.{} {},RC;", | ||
| 50 | type, composite, type, swizzle, object, type, ret); | ||
| 51 | } else if (ret != composite) { | ||
| 52 | // The input composite is not aliased with the return value so we have to copy it before | ||
| 53 | // hand. But the insert object is not aliased with the return value, so we don't have to | ||
| 54 | // worry about that | ||
| 55 | ctx.Add("MOV.{} {},{};" | ||
| 56 | "MOV.{} {}.{},{};", | ||
| 57 | type, ret, composite, type, ret, swizzle, object); | ||
| 58 | } else { | ||
| 59 | // The return value is alised so we can just insert the object, it doesn't matter if it's | ||
| 60 | // aliased | ||
| 61 | ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 67 | const IR::Value& e2) { | ||
| 68 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 72 | const IR::Value& e2, const IR::Value& e3) { | ||
| 73 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3); | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 77 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { | ||
| 78 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 82 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 86 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 90 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, | ||
| 94 | [[maybe_unused]] Register composite, | ||
| 95 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 96 | throw NotImplementedException("GLASM instruction"); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, | ||
| 100 | [[maybe_unused]] Register composite, | ||
| 101 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 102 | throw NotImplementedException("GLASM instruction"); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, | ||
| 106 | [[maybe_unused]] Register composite, | ||
| 107 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 108 | throw NotImplementedException("GLASM instruction"); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 112 | [[maybe_unused]] Register e2) { | ||
| 113 | throw NotImplementedException("GLASM instruction"); | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 117 | [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) { | ||
| 118 | throw NotImplementedException("GLASM instruction"); | ||
| 119 | } | ||
| 120 | |||
| 121 | void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 122 | [[maybe_unused]] Register e2, [[maybe_unused]] Register e3, | ||
| 123 | [[maybe_unused]] Register e4) { | ||
| 124 | throw NotImplementedException("GLASM instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 128 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 129 | throw NotImplementedException("GLASM instruction"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 133 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 134 | throw NotImplementedException("GLASM instruction"); | ||
| 135 | } | ||
| 136 | |||
| 137 | void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 138 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 139 | throw NotImplementedException("GLASM instruction"); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 143 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 144 | [[maybe_unused]] u32 index) { | ||
| 145 | throw NotImplementedException("GLASM instruction"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 149 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 150 | [[maybe_unused]] u32 index) { | ||
| 151 | throw NotImplementedException("GLASM instruction"); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 155 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 156 | [[maybe_unused]] u32 index) { | ||
| 157 | throw NotImplementedException("GLASM instruction"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 161 | const IR::Value& e2) { | ||
| 162 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2); | ||
| 163 | } | ||
| 164 | |||
| 165 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 166 | const IR::Value& e2, const IR::Value& e3) { | ||
| 167 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3); | ||
| 168 | } | ||
| 169 | |||
| 170 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 171 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { | ||
| 172 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 176 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 180 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 184 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 188 | ScalarF32 object, u32 index) { | ||
| 189 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 193 | ScalarF32 object, u32 index) { | ||
| 194 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 198 | ScalarF32 object, u32 index) { | ||
| 199 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 200 | } | ||
| 201 | |||
| 202 | void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 203 | throw NotImplementedException("GLASM instruction"); | ||
| 204 | } | ||
| 205 | |||
| 206 | void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 207 | throw NotImplementedException("GLASM instruction"); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 215 | throw NotImplementedException("GLASM instruction"); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 219 | throw NotImplementedException("GLASM instruction"); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 223 | throw NotImplementedException("GLASM instruction"); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, | ||
| 227 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 228 | [[maybe_unused]] u32 index) { | ||
| 229 | throw NotImplementedException("GLASM instruction"); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, | ||
| 233 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 234 | [[maybe_unused]] u32 index) { | ||
| 235 | throw NotImplementedException("GLASM instruction"); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, | ||
| 239 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 240 | [[maybe_unused]] u32 index) { | ||
| 241 | throw NotImplementedException("GLASM instruction"); | ||
| 242 | } | ||
| 243 | |||
| 244 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp new file mode 100644 index 000000000..02c9dc6d7 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 16 | std::string_view size) { | ||
| 17 | if (!binding.IsImmediate()) { | ||
| 18 | throw NotImplementedException("Indirect constant buffer loading"); | ||
| 19 | } | ||
| 20 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 21 | if (offset.type == Type::U32) { | ||
| 22 | // Avoid reading arrays out of bounds, matching hardware's behavior | ||
| 23 | if (offset.imm_u32 >= 0x10'000) { | ||
| 24 | ctx.Add("MOV.S {},0;", ret); | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); | ||
| 29 | } | ||
| 30 | |||
| 31 | bool IsInputArray(Stage stage) { | ||
| 32 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 33 | stage == Stage::TessellationEval; | ||
| 34 | } | ||
| 35 | |||
| 36 | std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { | ||
| 37 | return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; | ||
| 38 | } | ||
| 39 | |||
| 40 | u32 TexCoordIndex(IR::Attribute attr) { | ||
| 41 | return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; | ||
| 42 | } | ||
| 43 | } // Anonymous namespace | ||
| 44 | |||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 46 | GetCbuf(ctx, inst, binding, offset, "U8"); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 50 | GetCbuf(ctx, inst, binding, offset, "S8"); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 54 | GetCbuf(ctx, inst, binding, offset, "U16"); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 58 | GetCbuf(ctx, inst, binding, offset, "S16"); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 62 | GetCbuf(ctx, inst, binding, offset, "U32"); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 66 | GetCbuf(ctx, inst, binding, offset, "F32"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 70 | ScalarU32 offset) { | ||
| 71 | GetCbuf(ctx, inst, binding, offset, "U32X2"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) { | ||
| 75 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 76 | const char swizzle{"xyzw"[element]}; | ||
| 77 | if (IR::IsGeneric(attr)) { | ||
| 78 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 79 | ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); | ||
| 80 | return; | ||
| 81 | } | ||
| 82 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 83 | const u32 index{TexCoordIndex(attr)}; | ||
| 84 | ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle); | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | switch (attr) { | ||
| 88 | case IR::Attribute::PrimitiveId: | ||
| 89 | ctx.Add("MOV.S {}.x,primitive.id;", inst); | ||
| 90 | break; | ||
| 91 | case IR::Attribute::PositionX: | ||
| 92 | case IR::Attribute::PositionY: | ||
| 93 | case IR::Attribute::PositionZ: | ||
| 94 | case IR::Attribute::PositionW: | ||
| 95 | if (IsInputArray(ctx.stage)) { | ||
| 96 | ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); | ||
| 97 | } else { | ||
| 98 | ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 102 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 103 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 104 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 105 | ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle); | ||
| 106 | break; | ||
| 107 | case IR::Attribute::PointSpriteS: | ||
| 108 | case IR::Attribute::PointSpriteT: | ||
| 109 | ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); | ||
| 110 | break; | ||
| 111 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 112 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 113 | ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); | ||
| 114 | break; | ||
| 115 | case IR::Attribute::InstanceId: | ||
| 116 | ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); | ||
| 117 | break; | ||
| 118 | case IR::Attribute::VertexId: | ||
| 119 | ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); | ||
| 120 | break; | ||
| 121 | case IR::Attribute::FrontFace: | ||
| 122 | ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | throw NotImplementedException("Get attribute {}", attr); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, | ||
| 130 | [[maybe_unused]] ScalarU32 vertex) { | ||
| 131 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 132 | const char swizzle{"xyzw"[element]}; | ||
| 133 | if (IR::IsGeneric(attr)) { | ||
| 134 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 135 | ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { | ||
| 139 | const u32 index{TexCoordIndex(attr)}; | ||
| 140 | ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | switch (attr) { | ||
| 144 | case IR::Attribute::Layer: | ||
| 145 | if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 146 | ctx.Add("MOV.F result.layer.x,{};", value); | ||
| 147 | } else { | ||
| 148 | LOG_WARNING(Shader_GLASM, | ||
| 149 | "Layer stored outside of geometry shader not supported by device"); | ||
| 150 | } | ||
| 151 | break; | ||
| 152 | case IR::Attribute::ViewportIndex: | ||
| 153 | if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 154 | ctx.Add("MOV.F result.viewport.x,{};", value); | ||
| 155 | } else { | ||
| 156 | LOG_WARNING(Shader_GLASM, | ||
| 157 | "Viewport stored outside of geometry shader not supported by device"); | ||
| 158 | } | ||
| 159 | break; | ||
| 160 | case IR::Attribute::ViewportMask: | ||
| 161 | // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage. | ||
| 162 | if (ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 163 | ctx.Add("MOV.F result.viewportmask[0].x,{};", value); | ||
| 164 | } else { | ||
| 165 | LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask"); | ||
| 166 | } | ||
| 167 | break; | ||
| 168 | case IR::Attribute::PointSize: | ||
| 169 | ctx.Add("MOV.F result.pointsize.x,{};", value); | ||
| 170 | break; | ||
| 171 | case IR::Attribute::PositionX: | ||
| 172 | case IR::Attribute::PositionY: | ||
| 173 | case IR::Attribute::PositionZ: | ||
| 174 | case IR::Attribute::PositionW: | ||
| 175 | ctx.Add("MOV.F result.position.{},{};", swizzle, value); | ||
| 176 | break; | ||
| 177 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 178 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 179 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 180 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 181 | ctx.Add("MOV.F result.color.{},{};", swizzle, value); | ||
| 182 | break; | ||
| 183 | case IR::Attribute::ColorFrontSpecularR: | ||
| 184 | case IR::Attribute::ColorFrontSpecularG: | ||
| 185 | case IR::Attribute::ColorFrontSpecularB: | ||
| 186 | case IR::Attribute::ColorFrontSpecularA: | ||
| 187 | ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value); | ||
| 188 | break; | ||
| 189 | case IR::Attribute::ColorBackDiffuseR: | ||
| 190 | case IR::Attribute::ColorBackDiffuseG: | ||
| 191 | case IR::Attribute::ColorBackDiffuseB: | ||
| 192 | case IR::Attribute::ColorBackDiffuseA: | ||
| 193 | ctx.Add("MOV.F result.color.back.{},{};", swizzle, value); | ||
| 194 | break; | ||
| 195 | case IR::Attribute::ColorBackSpecularR: | ||
| 196 | case IR::Attribute::ColorBackSpecularG: | ||
| 197 | case IR::Attribute::ColorBackSpecularB: | ||
| 198 | case IR::Attribute::ColorBackSpecularA: | ||
| 199 | ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value); | ||
| 200 | break; | ||
| 201 | case IR::Attribute::FogCoordinate: | ||
| 202 | ctx.Add("MOV.F result.fogcoord.x,{};", value); | ||
| 203 | break; | ||
| 204 | case IR::Attribute::ClipDistance0: | ||
| 205 | case IR::Attribute::ClipDistance1: | ||
| 206 | case IR::Attribute::ClipDistance2: | ||
| 207 | case IR::Attribute::ClipDistance3: | ||
| 208 | case IR::Attribute::ClipDistance4: | ||
| 209 | case IR::Attribute::ClipDistance5: | ||
| 210 | case IR::Attribute::ClipDistance6: | ||
| 211 | case IR::Attribute::ClipDistance7: { | ||
| 212 | const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 213 | ctx.Add("MOV.F result.clip[{}].x,{};", index, value); | ||
| 214 | break; | ||
| 215 | } | ||
| 216 | default: | ||
| 217 | throw NotImplementedException("Set attribute {}", attr); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) { | ||
| 222 | // RC.x = base_index | ||
| 223 | // RC.y = masked_index | ||
| 224 | // RC.z = compare_index | ||
| 225 | ctx.Add("SHR.S RC.x,{},2;" | ||
| 226 | "AND.S RC.y,RC.x,3;" | ||
| 227 | "SHR.S RC.z,{},4;", | ||
| 228 | offset, offset); | ||
| 229 | |||
| 230 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 231 | u32 num_endifs{}; | ||
| 232 | const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) { | ||
| 233 | ++num_endifs; | ||
| 234 | ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index | ||
| 235 | "IF NE.w;" | ||
| 236 | // X | ||
| 237 | "SEQ.S.CC RC.w,RC.y,0;" | ||
| 238 | "IF NE.w;" | ||
| 239 | "MOV {}.x,{};" | ||
| 240 | "ELSE;" | ||
| 241 | // Y | ||
| 242 | "SEQ.S.CC RC.w,RC.y,1;" | ||
| 243 | "IF NE.w;" | ||
| 244 | "MOV {}.x,{};" | ||
| 245 | "ELSE;" | ||
| 246 | // Z | ||
| 247 | "SEQ.S.CC RC.w,RC.y,2;" | ||
| 248 | "IF NE.w;" | ||
| 249 | "MOV {}.x,{};" | ||
| 250 | "ELSE;" | ||
| 251 | // W | ||
| 252 | "MOV {}.x,{};" | ||
| 253 | "ENDIF;" | ||
| 254 | "ENDIF;" | ||
| 255 | "ENDIF;" | ||
| 256 | "ELSE;", | ||
| 257 | compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]); | ||
| 258 | }}; | ||
| 259 | const auto read_swizzled{[&](u32 compare_index, std::string_view value) { | ||
| 260 | const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value), | ||
| 261 | fmt::format("{}.z", value), fmt::format("{}.w", value)}; | ||
| 262 | read(compare_index, values); | ||
| 263 | }}; | ||
| 264 | if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 265 | const u32 index{static_cast<u32>(IR::Attribute::PositionX)}; | ||
| 266 | if (IsInputArray(ctx.stage)) { | ||
| 267 | read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); | ||
| 268 | } else { | ||
| 269 | read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { | ||
| 273 | if (!ctx.info.loads.Generic(index)) { | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); | ||
| 277 | } | ||
| 278 | for (u32 i = 0; i < num_endifs; ++i) { | ||
| 279 | ctx.Add("ENDIF;"); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, | ||
| 284 | [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { | ||
| 285 | throw NotImplementedException("GLASM instruction"); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { | ||
| 289 | if (!IR::IsGeneric(patch)) { | ||
| 290 | throw NotImplementedException("Non-generic patch load"); | ||
| 291 | } | ||
| 292 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 293 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 294 | const char swizzle{"xyzw"[element]}; | ||
| 295 | const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""}; | ||
| 296 | ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle); | ||
| 297 | } | ||
| 298 | |||
| 299 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { | ||
| 300 | if (IR::IsGeneric(patch)) { | ||
| 301 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 302 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 303 | ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value); | ||
| 304 | return; | ||
| 305 | } | ||
| 306 | switch (patch) { | ||
| 307 | case IR::Patch::TessellationLodLeft: | ||
| 308 | case IR::Patch::TessellationLodRight: | ||
| 309 | case IR::Patch::TessellationLodTop: | ||
| 310 | case IR::Patch::TessellationLodBottom: { | ||
| 311 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 312 | ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | case IR::Patch::TessellationLodInteriorU: | ||
| 316 | ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value); | ||
| 317 | break; | ||
| 318 | case IR::Patch::TessellationLodInteriorV: | ||
| 319 | ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value); | ||
| 320 | break; | ||
| 321 | default: | ||
| 322 | throw NotImplementedException("Patch {}", patch); | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { | ||
| 327 | ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); | ||
| 328 | } | ||
| 329 | |||
| 330 | void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) { | ||
| 331 | ctx.Add("MOV.S result.samplemask.x,{};", value); | ||
| 332 | } | ||
| 333 | |||
| 334 | void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) { | ||
| 335 | ctx.Add("MOV.F result.depth.z,{};", value); | ||
| 336 | } | ||
| 337 | |||
| 338 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { | ||
| 339 | ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { | ||
| 343 | ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value); | ||
| 344 | } | ||
| 345 | |||
| 346 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp new file mode 100644 index 000000000..ccdf1cbc8 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp | |||
| @@ -0,0 +1,231 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | std::string_view FpRounding(IR::FpRounding fp_rounding) { | ||
| 15 | switch (fp_rounding) { | ||
| 16 | case IR::FpRounding::DontCare: | ||
| 17 | return ""; | ||
| 18 | case IR::FpRounding::RN: | ||
| 19 | return ".ROUND"; | ||
| 20 | case IR::FpRounding::RZ: | ||
| 21 | return ".TRUNC"; | ||
| 22 | case IR::FpRounding::RM: | ||
| 23 | return ".FLR"; | ||
| 24 | case IR::FpRounding::RP: | ||
| 25 | return ".CEIL"; | ||
| 26 | } | ||
| 27 | throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding); | ||
| 28 | } | ||
| 29 | |||
| 30 | template <typename InputType> | ||
| 31 | void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest, | ||
| 32 | std::string_view src, bool is_long_result) { | ||
| 33 | const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)}; | ||
| 34 | const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)}; | ||
| 35 | ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value); | ||
| 36 | } | ||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 40 | Convert(ctx, inst, value, "S16", "F16", false); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 44 | Convert(ctx, inst, value, "S16", "F32", false); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 48 | Convert(ctx, inst, value, "S16", "F64", false); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 52 | Convert(ctx, inst, value, "S32", "F16", false); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 56 | Convert(ctx, inst, value, "S32", "F32", false); | ||
| 57 | } | ||
| 58 | |||
| 59 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 60 | Convert(ctx, inst, value, "S32", "F64", false); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 64 | Convert(ctx, inst, value, "S64", "F16", true); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 68 | Convert(ctx, inst, value, "S64", "F32", true); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 72 | Convert(ctx, inst, value, "S64", "F64", true); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | Convert(ctx, inst, value, "U16", "F16", false); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 80 | Convert(ctx, inst, value, "U16", "F32", false); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 84 | Convert(ctx, inst, value, "U16", "F64", false); | ||
| 85 | } | ||
| 86 | |||
| 87 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 88 | Convert(ctx, inst, value, "U32", "F16", false); | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 92 | Convert(ctx, inst, value, "U32", "F32", false); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 96 | Convert(ctx, inst, value, "U32", "F64", false); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 100 | Convert(ctx, inst, value, "U64", "F16", true); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 104 | Convert(ctx, inst, value, "U64", "F32", true); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 108 | Convert(ctx, inst, value, "U64", "F64", true); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 112 | Convert(ctx, inst, value, "U64", "U32", true); | ||
| 113 | } | ||
| 114 | |||
| 115 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 116 | Convert(ctx, inst, value, "U32", "U64", false); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 120 | Convert(ctx, inst, value, "F16", "F32", false); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 124 | Convert(ctx, inst, value, "F32", "F16", false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 128 | Convert(ctx, inst, value, "F32", "F64", false); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 132 | Convert(ctx, inst, value, "F64", "F32", true); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 136 | Convert(ctx, inst, value, "F16", "S8", false); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 140 | Convert(ctx, inst, value, "F16", "S16", false); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 144 | Convert(ctx, inst, value, "F16", "S32", false); | ||
| 145 | } | ||
| 146 | |||
| 147 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 148 | Convert(ctx, inst, value, "F16", "S64", false); | ||
| 149 | } | ||
| 150 | |||
| 151 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 152 | Convert(ctx, inst, value, "F16", "U8", false); | ||
| 153 | } | ||
| 154 | |||
| 155 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 156 | Convert(ctx, inst, value, "F16", "U16", false); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 160 | Convert(ctx, inst, value, "F16", "U32", false); | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 164 | Convert(ctx, inst, value, "F16", "U64", false); | ||
| 165 | } | ||
| 166 | |||
| 167 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 168 | Convert(ctx, inst, value, "F32", "S8", false); | ||
| 169 | } | ||
| 170 | |||
| 171 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 172 | Convert(ctx, inst, value, "F32", "S16", false); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 176 | Convert(ctx, inst, value, "F32", "S32", false); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 180 | Convert(ctx, inst, value, "F32", "S64", false); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 184 | Convert(ctx, inst, value, "F32", "U8", false); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 188 | Convert(ctx, inst, value, "F32", "U16", false); | ||
| 189 | } | ||
| 190 | |||
| 191 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 192 | Convert(ctx, inst, value, "F32", "U32", false); | ||
| 193 | } | ||
| 194 | |||
| 195 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 196 | Convert(ctx, inst, value, "F32", "U64", false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 200 | Convert(ctx, inst, value, "F64", "S8", true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 204 | Convert(ctx, inst, value, "F64", "S16", true); | ||
| 205 | } | ||
| 206 | |||
| 207 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 208 | Convert(ctx, inst, value, "F64", "S32", true); | ||
| 209 | } | ||
| 210 | |||
| 211 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 212 | Convert(ctx, inst, value, "F64", "S64", true); | ||
| 213 | } | ||
| 214 | |||
| 215 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 216 | Convert(ctx, inst, value, "F64", "U8", true); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 220 | Convert(ctx, inst, value, "F64", "U16", true); | ||
| 221 | } | ||
| 222 | |||
| 223 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 224 | Convert(ctx, inst, value, "F64", "U32", true); | ||
| 225 | } | ||
| 226 | |||
| 227 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 228 | Convert(ctx, inst, value, "F64", "U64", true); | ||
| 229 | } | ||
| 230 | |||
| 231 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp new file mode 100644 index 000000000..4ed58619d --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp | |||
| @@ -0,0 +1,414 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | template <typename InputType> | ||
| 15 | void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op, | ||
| 16 | std::string_view type, bool ordered, bool inequality = false) { | ||
| 17 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 18 | ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); | ||
| 19 | if (ordered && inequality) { | ||
| 20 | ctx.Add("SEQ.{} RC.y,{},{};" | ||
| 21 | "SEQ.{} RC.z,{},{};" | ||
| 22 | "AND.U RC.x,RC.x,RC.y;" | ||
| 23 | "AND.U RC.x,RC.x,RC.z;" | ||
| 24 | "SNE.S {}.x,RC.x,0;", | ||
| 25 | type, lhs, lhs, type, rhs, rhs, ret); | ||
| 26 | } else if (ordered) { | ||
| 27 | ctx.Add("SNE.S {}.x,RC.x,0;", ret); | ||
| 28 | } else { | ||
| 29 | ctx.Add("SNE.{} RC.y,{},{};" | ||
| 30 | "SNE.{} RC.z,{},{};" | ||
| 31 | "OR.U RC.x,RC.x,RC.y;" | ||
| 32 | "OR.U RC.x,RC.x,RC.z;" | ||
| 33 | "SNE.S {}.x,RC.x,0;", | ||
| 34 | type, lhs, lhs, type, rhs, rhs, ret); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | template <typename InputType> | ||
| 39 | void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, | ||
| 40 | InputType max_value, std::string_view type) { | ||
| 41 | // Call MAX first to properly clamp nan to min_value instead | ||
| 42 | ctx.Add("MAX.{} RC.x,{},{};" | ||
| 43 | "MIN.{} {}.x,RC.x,{};", | ||
| 44 | type, min_value, value, type, ret, max_value); | ||
| 45 | } | ||
| 46 | |||
| 47 | std::string_view Precise(IR::Inst& inst) { | ||
| 48 | const bool precise{inst.Flags<IR::FpControl>().no_contraction}; | ||
| 49 | return precise ? ".PREC" : ""; | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 54 | [[maybe_unused]] Register value) { | ||
| 55 | throw NotImplementedException("GLASM instruction"); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 59 | ctx.Add("MOV.F {}.x,|{}|;", inst, value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 63 | ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 67 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | ||
| 68 | throw NotImplementedException("GLASM instruction"); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 72 | ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 76 | ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 80 | [[maybe_unused]] Register a, [[maybe_unused]] Register b, | ||
| 81 | [[maybe_unused]] Register c) { | ||
| 82 | throw NotImplementedException("GLASM instruction"); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { | ||
| 86 | ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { | ||
| 90 | ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 94 | ctx.Add("MAX.F {}.x,{},{};", inst, a, b); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 98 | ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 102 | ctx.Add("MIN.F {}.x,{},{};", inst, a, b); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 106 | ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 110 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | ||
| 111 | throw NotImplementedException("GLASM instruction"); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 115 | ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 119 | ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 123 | throw NotImplementedException("GLASM instruction"); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { | ||
| 127 | ctx.Add("MOV.F {}.x,-{};", inst, value); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 131 | ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 135 | ctx.Add("SIN {}.x,{};", inst, value); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 139 | ctx.Add("COS {}.x,{};", inst, value); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 143 | ctx.Add("EX2 {}.x,{};", inst, value); | ||
| 144 | } | ||
| 145 | |||
| 146 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 147 | ctx.Add("LG2 {}.x,{};", inst, value); | ||
| 148 | } | ||
| 149 | |||
| 150 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 151 | ctx.Add("RCP {}.x,{};", inst, value); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 155 | throw NotImplementedException("GLASM instruction"); | ||
| 156 | } | ||
| 157 | |||
| 158 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 159 | ctx.Add("RSQ {}.x,{};", inst, value); | ||
| 160 | } | ||
| 161 | |||
| 162 | void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 163 | throw NotImplementedException("GLASM instruction"); | ||
| 164 | } | ||
| 165 | |||
| 166 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 167 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 168 | ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret); | ||
| 169 | } | ||
| 170 | |||
| 171 | void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 172 | throw NotImplementedException("GLASM instruction"); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 176 | ctx.Add("MOV.F.SAT {}.x,{};", inst, value); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 180 | throw NotImplementedException("GLASM instruction"); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, | ||
| 184 | [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { | ||
| 185 | throw NotImplementedException("GLASM instruction"); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, | ||
| 189 | ScalarF32 max_value) { | ||
| 190 | Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F"); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, | ||
| 194 | ScalarF64 max_value) { | ||
| 195 | Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64"); | ||
| 196 | } | ||
| 197 | |||
| 198 | void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 199 | throw NotImplementedException("GLASM instruction"); | ||
| 200 | } | ||
| 201 | |||
| 202 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 203 | ctx.Add("ROUND.F {}.x,{};", inst, value); | ||
| 204 | } | ||
| 205 | |||
| 206 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 207 | ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 215 | ctx.Add("FLR.F {}.x,{};", inst, value); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 219 | ctx.LongAdd("FLR.F64 {}.x,{};", inst, value); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 223 | throw NotImplementedException("GLASM instruction"); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 227 | ctx.Add("CEIL.F {}.x,{};", inst, value); | ||
| 228 | } | ||
| 229 | |||
| 230 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 231 | ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 235 | throw NotImplementedException("GLASM instruction"); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 239 | ctx.Add("TRUNC.F {}.x,{};", inst, value); | ||
| 240 | } | ||
| 241 | |||
| 242 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 243 | ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value); | ||
| 244 | } | ||
| 245 | |||
| 246 | void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 247 | [[maybe_unused]] Register rhs) { | ||
| 248 | throw NotImplementedException("GLASM instruction"); | ||
| 249 | } | ||
| 250 | |||
| 251 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 252 | Compare(ctx, inst, lhs, rhs, "SEQ", "F", true); | ||
| 253 | } | ||
| 254 | |||
| 255 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 256 | Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true); | ||
| 257 | } | ||
| 258 | |||
| 259 | void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 260 | [[maybe_unused]] Register rhs) { | ||
| 261 | throw NotImplementedException("GLASM instruction"); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 265 | Compare(ctx, inst, lhs, rhs, "SEQ", "F", false); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 269 | Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false); | ||
| 270 | } | ||
| 271 | |||
| 272 | void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 273 | [[maybe_unused]] Register rhs) { | ||
| 274 | throw NotImplementedException("GLASM instruction"); | ||
| 275 | } | ||
| 276 | |||
| 277 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 278 | Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true); | ||
| 279 | } | ||
| 280 | |||
| 281 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 282 | Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true); | ||
| 283 | } | ||
| 284 | |||
| 285 | void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 286 | [[maybe_unused]] Register rhs) { | ||
| 287 | throw NotImplementedException("GLASM instruction"); | ||
| 288 | } | ||
| 289 | |||
| 290 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 291 | Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true); | ||
| 292 | } | ||
| 293 | |||
| 294 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 295 | Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true); | ||
| 296 | } | ||
| 297 | |||
| 298 | void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 299 | [[maybe_unused]] Register rhs) { | ||
| 300 | throw NotImplementedException("GLASM instruction"); | ||
| 301 | } | ||
| 302 | |||
| 303 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 304 | Compare(ctx, inst, lhs, rhs, "SLT", "F", true); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 308 | Compare(ctx, inst, lhs, rhs, "SLT", "F64", true); | ||
| 309 | } | ||
| 310 | |||
| 311 | void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 312 | [[maybe_unused]] Register rhs) { | ||
| 313 | throw NotImplementedException("GLASM instruction"); | ||
| 314 | } | ||
| 315 | |||
| 316 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 317 | Compare(ctx, inst, lhs, rhs, "SLT", "F", false); | ||
| 318 | } | ||
| 319 | |||
| 320 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 321 | Compare(ctx, inst, lhs, rhs, "SLT", "F64", false); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 325 | [[maybe_unused]] Register rhs) { | ||
| 326 | throw NotImplementedException("GLASM instruction"); | ||
| 327 | } | ||
| 328 | |||
| 329 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 330 | Compare(ctx, inst, lhs, rhs, "SGT", "F", true); | ||
| 331 | } | ||
| 332 | |||
| 333 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 334 | Compare(ctx, inst, lhs, rhs, "SGT", "F64", true); | ||
| 335 | } | ||
| 336 | |||
| 337 | void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 338 | [[maybe_unused]] Register rhs) { | ||
| 339 | throw NotImplementedException("GLASM instruction"); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 343 | Compare(ctx, inst, lhs, rhs, "SGT", "F", false); | ||
| 344 | } | ||
| 345 | |||
| 346 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 347 | Compare(ctx, inst, lhs, rhs, "SGT", "F64", false); | ||
| 348 | } | ||
| 349 | |||
| 350 | void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 351 | [[maybe_unused]] Register rhs) { | ||
| 352 | throw NotImplementedException("GLASM instruction"); | ||
| 353 | } | ||
| 354 | |||
| 355 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 356 | Compare(ctx, inst, lhs, rhs, "SLE", "F", true); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 360 | Compare(ctx, inst, lhs, rhs, "SLE", "F64", true); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 364 | [[maybe_unused]] Register rhs) { | ||
| 365 | throw NotImplementedException("GLASM instruction"); | ||
| 366 | } | ||
| 367 | |||
| 368 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 369 | Compare(ctx, inst, lhs, rhs, "SLE", "F", false); | ||
| 370 | } | ||
| 371 | |||
| 372 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 373 | Compare(ctx, inst, lhs, rhs, "SLE", "F64", false); | ||
| 374 | } | ||
| 375 | |||
| 376 | void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 377 | [[maybe_unused]] Register rhs) { | ||
| 378 | throw NotImplementedException("GLASM instruction"); | ||
| 379 | } | ||
| 380 | |||
| 381 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 382 | Compare(ctx, inst, lhs, rhs, "SGE", "F", true); | ||
| 383 | } | ||
| 384 | |||
| 385 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 386 | Compare(ctx, inst, lhs, rhs, "SGE", "F64", true); | ||
| 387 | } | ||
| 388 | |||
| 389 | void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 390 | [[maybe_unused]] Register rhs) { | ||
| 391 | throw NotImplementedException("GLASM instruction"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 395 | Compare(ctx, inst, lhs, rhs, "SGE", "F", false); | ||
| 396 | } | ||
| 397 | |||
| 398 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 399 | Compare(ctx, inst, lhs, rhs, "SGE", "F64", false); | ||
| 400 | } | ||
| 401 | |||
| 402 | void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 403 | throw NotImplementedException("GLASM instruction"); | ||
| 404 | } | ||
| 405 | |||
| 406 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 407 | Compare(ctx, inst, value, value, "SNE", "F", true, false); | ||
| 408 | } | ||
| 409 | |||
| 410 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 411 | Compare(ctx, inst, value, value, "SNE", "F64", true, false); | ||
| 412 | } | ||
| 413 | |||
| 414 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..09e3a9b82 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -0,0 +1,850 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | struct ScopedRegister { | ||
| 15 | ScopedRegister() = default; | ||
| 16 | ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} | ||
| 17 | |||
| 18 | ~ScopedRegister() { | ||
| 19 | if (reg_alloc) { | ||
| 20 | reg_alloc->FreeReg(reg); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | |||
| 24 | ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { | ||
| 25 | if (reg_alloc) { | ||
| 26 | reg_alloc->FreeReg(reg); | ||
| 27 | } | ||
| 28 | reg_alloc = std::exchange(rhs.reg_alloc, nullptr); | ||
| 29 | reg = rhs.reg; | ||
| 30 | return *this; | ||
| 31 | } | ||
| 32 | |||
| 33 | ScopedRegister(ScopedRegister&& rhs) noexcept | ||
| 34 | : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} | ||
| 35 | |||
| 36 | ScopedRegister& operator=(const ScopedRegister&) = delete; | ||
| 37 | ScopedRegister(const ScopedRegister&) = delete; | ||
| 38 | |||
| 39 | RegAlloc* reg_alloc{}; | ||
| 40 | Register reg; | ||
| 41 | }; | ||
| 42 | |||
| 43 | std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, | ||
| 44 | [[maybe_unused]] const IR::Value& index) { | ||
| 45 | // FIXME: indexed reads | ||
| 46 | if (info.type == TextureType::Buffer) { | ||
| 47 | return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); | ||
| 48 | } else { | ||
| 49 | return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | std::string Image(EmitContext& ctx, IR::TextureInstInfo info, | ||
| 54 | [[maybe_unused]] const IR::Value& index) { | ||
| 55 | // FIXME: indexed reads | ||
| 56 | if (info.type == TextureType::Buffer) { | ||
| 57 | return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); | ||
| 58 | } else { | ||
| 59 | return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view TextureType(IR::TextureInstInfo info) { | ||
| 64 | if (info.is_depth) { | ||
| 65 | switch (info.type) { | ||
| 66 | case TextureType::Color1D: | ||
| 67 | return "SHADOW1D"; | ||
| 68 | case TextureType::ColorArray1D: | ||
| 69 | return "SHADOWARRAY1D"; | ||
| 70 | case TextureType::Color2D: | ||
| 71 | return "SHADOW2D"; | ||
| 72 | case TextureType::ColorArray2D: | ||
| 73 | return "SHADOWARRAY2D"; | ||
| 74 | case TextureType::Color3D: | ||
| 75 | return "SHADOW3D"; | ||
| 76 | case TextureType::ColorCube: | ||
| 77 | return "SHADOWCUBE"; | ||
| 78 | case TextureType::ColorArrayCube: | ||
| 79 | return "SHADOWARRAYCUBE"; | ||
| 80 | case TextureType::Buffer: | ||
| 81 | return "SHADOWBUFFER"; | ||
| 82 | } | ||
| 83 | } else { | ||
| 84 | switch (info.type) { | ||
| 85 | case TextureType::Color1D: | ||
| 86 | return "1D"; | ||
| 87 | case TextureType::ColorArray1D: | ||
| 88 | return "ARRAY1D"; | ||
| 89 | case TextureType::Color2D: | ||
| 90 | return "2D"; | ||
| 91 | case TextureType::ColorArray2D: | ||
| 92 | return "ARRAY2D"; | ||
| 93 | case TextureType::Color3D: | ||
| 94 | return "3D"; | ||
| 95 | case TextureType::ColorCube: | ||
| 96 | return "CUBE"; | ||
| 97 | case TextureType::ColorArrayCube: | ||
| 98 | return "ARRAYCUBE"; | ||
| 99 | case TextureType::Buffer: | ||
| 100 | return "BUFFER"; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | throw InvalidArgument("Invalid texture type {}", info.type.Value()); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string Offset(EmitContext& ctx, const IR::Value& offset) { | ||
| 107 | if (offset.IsEmpty()) { | ||
| 108 | return ""; | ||
| 109 | } | ||
| 110 | return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); | ||
| 111 | } | ||
| 112 | |||
| 113 | std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx, | ||
| 114 | const IR::Value& offset2) { | ||
| 115 | if (offset2.IsEmpty()) { | ||
| 116 | return {}; | ||
| 117 | } else { | ||
| 118 | return {ctx.reg_alloc, ctx.reg_alloc}; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, | ||
| 123 | const IR::Value& offset2) { | ||
| 124 | const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; | ||
| 125 | const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; | ||
| 126 | // Input swizzle: [XYXY] [XYXY] | ||
| 127 | // Output swizzle: [XXXX] [YYYY] | ||
| 128 | ctx.Add("MOV {}.x,{}.x;" | ||
| 129 | "MOV {}.y,{}.z;" | ||
| 130 | "MOV {}.z,{}.x;" | ||
| 131 | "MOV {}.w,{}.z;" | ||
| 132 | "MOV {}.x,{}.y;" | ||
| 133 | "MOV {}.y,{}.w;" | ||
| 134 | "MOV {}.z,{}.y;" | ||
| 135 | "MOV {}.w,{}.w;", | ||
| 136 | off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, | ||
| 137 | offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); | ||
| 138 | } | ||
| 139 | |||
| 140 | std::string GradOffset(const IR::Value& offset) { | ||
| 141 | if (offset.IsImmediate()) { | ||
| 142 | LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); | ||
| 143 | return ""; | ||
| 144 | } | ||
| 145 | IR::Inst* const vector{offset.InstRecursive()}; | ||
| 146 | if (!vector->AreAllArgsImmediates()) { | ||
| 147 | LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); | ||
| 148 | return ""; | ||
| 149 | } | ||
| 150 | switch (vector->NumArgs()) { | ||
| 151 | case 1: | ||
| 152 | return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32())); | ||
| 153 | case 2: | ||
| 154 | return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()), | ||
| 155 | static_cast<s32>(vector->Arg(1).U32())); | ||
| 156 | default: | ||
| 157 | throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) { | ||
| 162 | if (coord.IsImmediate()) { | ||
| 163 | ScopedRegister scoped_reg(ctx.reg_alloc); | ||
| 164 | ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); | ||
| 165 | return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; | ||
| 166 | } | ||
| 167 | std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; | ||
| 168 | if (coord.InstRecursive()->HasUses()) { | ||
| 169 | // Move non-dead coords to a separate register, although this should never happen because | ||
| 170 | // vectors are only assembled for immediate texture instructions | ||
| 171 | ctx.Add("MOV.F RC,{};", coord_vec); | ||
| 172 | coord_vec = "RC"; | ||
| 173 | } | ||
| 174 | return {std::move(coord_vec), ScopedRegister{}}; | ||
| 175 | } | ||
| 176 | |||
| 177 | void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { | ||
| 178 | if (!sparse_inst) { | ||
| 179 | return; | ||
| 180 | } | ||
| 181 | const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; | ||
| 182 | ctx.Add("MOV.S {},-1;" | ||
| 183 | "MOV.S {}(NONRESIDENT),0;", | ||
| 184 | sparse_ret, sparse_ret); | ||
| 185 | } | ||
| 186 | |||
| 187 | std::string_view FormatStorage(ImageFormat format) { | ||
| 188 | switch (format) { | ||
| 189 | case ImageFormat::Typeless: | ||
| 190 | return "U"; | ||
| 191 | case ImageFormat::R8_UINT: | ||
| 192 | return "U8"; | ||
| 193 | case ImageFormat::R8_SINT: | ||
| 194 | return "S8"; | ||
| 195 | case ImageFormat::R16_UINT: | ||
| 196 | return "U16"; | ||
| 197 | case ImageFormat::R16_SINT: | ||
| 198 | return "S16"; | ||
| 199 | case ImageFormat::R32_UINT: | ||
| 200 | return "U32"; | ||
| 201 | case ImageFormat::R32G32_UINT: | ||
| 202 | return "U32X2"; | ||
| 203 | case ImageFormat::R32G32B32A32_UINT: | ||
| 204 | return "U32X4"; | ||
| 205 | } | ||
| 206 | throw InvalidArgument("Invalid image format {}", format); | ||
| 207 | } | ||
| 208 | |||
| 209 | template <typename T> | ||
| 210 | void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, | ||
| 211 | std::string_view op) { | ||
| 212 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 213 | const std::string_view type{TextureType(info)}; | ||
| 214 | const std::string image{Image(ctx, info, index)}; | ||
| 215 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 216 | ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::Inst* PrepareSparse(IR::Inst& inst) { | ||
| 220 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 221 | if (sparse_inst) { | ||
| 222 | sparse_inst->Invalidate(); | ||
| 223 | } | ||
| 224 | return sparse_inst; | ||
| 225 | } | ||
| 226 | } // Anonymous namespace | ||
| 227 | |||
| 228 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 229 | const IR::Value& coord, Register bias_lc, const IR::Value& offset) { | ||
| 230 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 231 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 232 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 233 | const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; | ||
| 234 | const std::string_view type{TextureType(info)}; | ||
| 235 | const std::string texture{Texture(ctx, info, index)}; | ||
| 236 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 237 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 238 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 239 | if (info.has_bias) { | ||
| 240 | if (info.type == TextureType::ColorArrayCube) { | ||
| 241 | ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, | ||
| 242 | bias_lc, texture, offset_vec); | ||
| 243 | } else { | ||
| 244 | if (info.has_lod_clamp) { | ||
| 245 | ctx.Add("MOV.F {}.w,{}.x;" | ||
| 246 | "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", | ||
| 247 | coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, | ||
| 248 | offset_vec); | ||
| 249 | } else { | ||
| 250 | ctx.Add("MOV.F {}.w,{}.x;" | ||
| 251 | "TXB.F{} {},{},{},{}{};", | ||
| 252 | coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } else { | ||
| 256 | if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { | ||
| 257 | ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, | ||
| 258 | bias_lc, texture, offset_vec); | ||
| 259 | } else { | ||
| 260 | ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, | ||
| 261 | type, offset_vec); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | StoreSparse(ctx, sparse_inst); | ||
| 265 | } | ||
| 266 | |||
| 267 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 268 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { | ||
| 269 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 270 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 271 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 272 | const std::string_view type{TextureType(info)}; | ||
| 273 | const std::string texture{Texture(ctx, info, index)}; | ||
| 274 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 275 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 276 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 277 | if (info.type == TextureType::ColorArrayCube) { | ||
| 278 | ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, | ||
| 279 | offset_vec); | ||
| 280 | } else { | ||
| 281 | ctx.Add("MOV.F {}.w,{};" | ||
| 282 | "TXL.F{} {},{},{},{}{};", | ||
| 283 | coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 284 | } | ||
| 285 | StoreSparse(ctx, sparse_inst); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 289 | const IR::Value& coord, const IR::Value& dref, | ||
| 290 | const IR::Value& bias_lc, const IR::Value& offset) { | ||
| 291 | // Allocate early to avoid aliases | ||
| 292 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 293 | ScopedRegister staging; | ||
| 294 | if (info.type == TextureType::ColorArrayCube) { | ||
| 295 | staging = ScopedRegister{ctx.reg_alloc}; | ||
| 296 | } | ||
| 297 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | ||
| 298 | const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; | ||
| 299 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 300 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 301 | const std::string_view type{TextureType(info)}; | ||
| 302 | const std::string texture{Texture(ctx, info, index)}; | ||
| 303 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 304 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 305 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 306 | if (info.has_bias) { | ||
| 307 | if (info.has_lod_clamp) { | ||
| 308 | switch (info.type) { | ||
| 309 | case TextureType::Color1D: | ||
| 310 | case TextureType::ColorArray1D: | ||
| 311 | case TextureType::Color2D: | ||
| 312 | ctx.Add("MOV.F {}.z,{};" | ||
| 313 | "MOV.F {}.w,{}.x;" | ||
| 314 | "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", | ||
| 315 | coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 316 | bias_lc_vec, texture, type, offset_vec); | ||
| 317 | break; | ||
| 318 | case TextureType::ColorArray2D: | ||
| 319 | case TextureType::ColorCube: | ||
| 320 | ctx.Add("MOV.F {}.w,{};" | ||
| 321 | "TXB.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 322 | coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, | ||
| 323 | offset_vec); | ||
| 324 | break; | ||
| 325 | default: | ||
| 326 | throw NotImplementedException("Invalid type {} with bias and lod clamp", | ||
| 327 | info.type.Value()); | ||
| 328 | } | ||
| 329 | } else { | ||
| 330 | switch (info.type) { | ||
| 331 | case TextureType::Color1D: | ||
| 332 | case TextureType::ColorArray1D: | ||
| 333 | case TextureType::Color2D: | ||
| 334 | ctx.Add("MOV.F {}.z,{};" | ||
| 335 | "MOV.F {}.w,{}.x;" | ||
| 336 | "TXB.F{} {},{},{},{}{};", | ||
| 337 | coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 338 | texture, type, offset_vec); | ||
| 339 | break; | ||
| 340 | case TextureType::ColorArray2D: | ||
| 341 | case TextureType::ColorCube: | ||
| 342 | ctx.Add("MOV.F {}.w,{};" | ||
| 343 | "TXB.F{} {},{},{},{},{}{};", | ||
| 344 | coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, | ||
| 345 | offset_vec); | ||
| 346 | break; | ||
| 347 | case TextureType::ColorArrayCube: | ||
| 348 | ctx.Add("MOV.F {}.x,{};" | ||
| 349 | "MOV.F {}.y,{}.x;" | ||
| 350 | "TXB.F{} {},{},{},{},{}{};", | ||
| 351 | staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 352 | staging.reg, texture, type, offset_vec); | ||
| 353 | break; | ||
| 354 | default: | ||
| 355 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | } else { | ||
| 359 | if (info.has_lod_clamp) { | ||
| 360 | if (info.type != TextureType::ColorArrayCube) { | ||
| 361 | const bool w_swizzle{info.type == TextureType::ColorArray2D || | ||
| 362 | info.type == TextureType::ColorCube}; | ||
| 363 | const char dref_swizzle{w_swizzle ? 'w' : 'z'}; | ||
| 364 | ctx.Add("MOV.F {}.{},{};" | ||
| 365 | "TEX.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 366 | coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, | ||
| 367 | texture, type, offset_vec); | ||
| 368 | } else { | ||
| 369 | ctx.Add("MOV.F {}.x,{};" | ||
| 370 | "MOV.F {}.y,{};" | ||
| 371 | "TEX.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 372 | staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 373 | staging.reg, texture, type, offset_vec); | ||
| 374 | } | ||
| 375 | } else { | ||
| 376 | if (info.type != TextureType::ColorArrayCube) { | ||
| 377 | const bool w_swizzle{info.type == TextureType::ColorArray2D || | ||
| 378 | info.type == TextureType::ColorCube}; | ||
| 379 | const char dref_swizzle{w_swizzle ? 'w' : 'z'}; | ||
| 380 | ctx.Add("MOV.F {}.{},{};" | ||
| 381 | "TEX.F{} {},{},{},{}{};", | ||
| 382 | coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, | ||
| 383 | type, offset_vec); | ||
| 384 | } else { | ||
| 385 | ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, | ||
| 386 | type, offset_vec); | ||
| 387 | } | ||
| 388 | } | ||
| 389 | } | ||
| 390 | StoreSparse(ctx, sparse_inst); | ||
| 391 | } | ||
| 392 | |||
| 393 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 394 | const IR::Value& coord, const IR::Value& dref, | ||
| 395 | const IR::Value& lod, const IR::Value& offset) { | ||
| 396 | // Allocate early to avoid aliases | ||
| 397 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 398 | ScopedRegister staging; | ||
| 399 | if (info.type == TextureType::ColorArrayCube) { | ||
| 400 | staging = ScopedRegister{ctx.reg_alloc}; | ||
| 401 | } | ||
| 402 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | ||
| 403 | const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; | ||
| 404 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 405 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 406 | const std::string_view type{TextureType(info)}; | ||
| 407 | const std::string texture{Texture(ctx, info, index)}; | ||
| 408 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 409 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 410 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 411 | switch (info.type) { | ||
| 412 | case TextureType::Color1D: | ||
| 413 | case TextureType::ColorArray1D: | ||
| 414 | case TextureType::Color2D: | ||
| 415 | ctx.Add("MOV.F {}.z,{};" | ||
| 416 | "MOV.F {}.w,{};" | ||
| 417 | "TXL.F{} {},{},{},{}{};", | ||
| 418 | coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, | ||
| 419 | offset_vec); | ||
| 420 | break; | ||
| 421 | case TextureType::ColorArray2D: | ||
| 422 | case TextureType::ColorCube: | ||
| 423 | ctx.Add("MOV.F {}.w,{};" | ||
| 424 | "TXL.F{} {},{},{},{},{}{};", | ||
| 425 | coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, | ||
| 426 | offset_vec); | ||
| 427 | break; | ||
| 428 | case TextureType::ColorArrayCube: | ||
| 429 | ctx.Add("MOV.F {}.x,{};" | ||
| 430 | "MOV.F {}.y,{};" | ||
| 431 | "TXL.F{} {},{},{},{},{}{};", | ||
| 432 | staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, | ||
| 433 | staging.reg, texture, type, offset_vec); | ||
| 434 | break; | ||
| 435 | default: | ||
| 436 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 437 | } | ||
| 438 | StoreSparse(ctx, sparse_inst); | ||
| 439 | } | ||
| 440 | |||
| 441 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 442 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { | ||
| 443 | // Allocate offsets early so they don't overwrite any consumed register | ||
| 444 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | ||
| 445 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 446 | const char comp{"xyzw"[info.gather_component]}; | ||
| 447 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 448 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 449 | const std::string_view type{TextureType(info)}; | ||
| 450 | const std::string texture{Texture(ctx, info, index)}; | ||
| 451 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 452 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 453 | if (offset2.IsEmpty()) { | ||
| 454 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 455 | ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, | ||
| 456 | offset_vec); | ||
| 457 | } else { | ||
| 458 | SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); | ||
| 459 | ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, | ||
| 460 | texture, comp, type); | ||
| 461 | } | ||
| 462 | StoreSparse(ctx, sparse_inst); | ||
| 463 | } | ||
| 464 | |||
| 465 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 466 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, | ||
| 467 | const IR::Value& dref) { | ||
| 468 | // FIXME: This instruction is not working as expected | ||
| 469 | |||
| 470 | // Allocate offsets early so they don't overwrite any consumed register | ||
| 471 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | ||
| 472 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 473 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 474 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 475 | const std::string_view type{TextureType(info)}; | ||
| 476 | const std::string texture{Texture(ctx, info, index)}; | ||
| 477 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 478 | const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; | ||
| 479 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 480 | std::string args; | ||
| 481 | switch (info.type) { | ||
| 482 | case TextureType::Color2D: | ||
| 483 | ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); | ||
| 484 | args = fmt::to_string(coord_vec); | ||
| 485 | break; | ||
| 486 | case TextureType::ColorArray2D: | ||
| 487 | case TextureType::ColorCube: | ||
| 488 | ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); | ||
| 489 | args = fmt::to_string(coord_vec); | ||
| 490 | break; | ||
| 491 | case TextureType::ColorArrayCube: | ||
| 492 | args = fmt::format("{},{}", coord_vec, dref_value); | ||
| 493 | break; | ||
| 494 | default: | ||
| 495 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 496 | } | ||
| 497 | if (offset2.IsEmpty()) { | ||
| 498 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 499 | ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); | ||
| 500 | } else { | ||
| 501 | SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); | ||
| 502 | ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, | ||
| 503 | type); | ||
| 504 | } | ||
| 505 | StoreSparse(ctx, sparse_inst); | ||
| 506 | } | ||
| 507 | |||
| 508 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 509 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { | ||
| 510 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 511 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 512 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 513 | const std::string_view type{TextureType(info)}; | ||
| 514 | const std::string texture{Texture(ctx, info, index)}; | ||
| 515 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 516 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 517 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 518 | if (info.type == TextureType::Buffer) { | ||
| 519 | ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 520 | } else if (ms.type != Type::Void) { | ||
| 521 | ctx.Add("MOV.S {}.w,{};" | ||
| 522 | "TXFMS.F{} {},{},{},{}{};", | ||
| 523 | coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 524 | } else { | ||
| 525 | ctx.Add("MOV.S {}.w,{};" | ||
| 526 | "TXF.F{} {},{},{},{}{};", | ||
| 527 | coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 528 | } | ||
| 529 | StoreSparse(ctx, sparse_inst); | ||
| 530 | } | ||
| 531 | |||
| 532 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 533 | ScalarS32 lod) { | ||
| 534 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 535 | const std::string texture{Texture(ctx, info, index)}; | ||
| 536 | const std::string_view type{TextureType(info)}; | ||
| 537 | ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); | ||
| 538 | } | ||
| 539 | |||
| 540 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | ||
| 541 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 542 | const std::string texture{Texture(ctx, info, index)}; | ||
| 543 | const std::string_view type{TextureType(info)}; | ||
| 544 | ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); | ||
| 545 | } | ||
| 546 | |||
| 547 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 548 | const IR::Value& coord, const IR::Value& derivatives, | ||
| 549 | const IR::Value& offset, const IR::Value& lod_clamp) { | ||
| 550 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 551 | ScopedRegister dpdx, dpdy; | ||
| 552 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||
| 553 | if (multi_component) { | ||
| 554 | // Allocate this early to avoid aliasing other registers | ||
| 555 | dpdx = ScopedRegister{ctx.reg_alloc}; | ||
| 556 | dpdy = ScopedRegister{ctx.reg_alloc}; | ||
| 557 | } | ||
| 558 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 559 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 560 | const std::string_view type{TextureType(info)}; | ||
| 561 | const std::string texture{Texture(ctx, info, index)}; | ||
| 562 | const std::string offset_vec{GradOffset(offset)}; | ||
| 563 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 564 | const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; | ||
| 565 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 566 | if (multi_component) { | ||
| 567 | ctx.Add("MOV.F {}.x,{}.x;" | ||
| 568 | "MOV.F {}.y,{}.z;" | ||
| 569 | "MOV.F {}.x,{}.y;" | ||
| 570 | "MOV.F {}.y,{}.w;", | ||
| 571 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | ||
| 572 | dpdy.reg, derivatives_vec); | ||
| 573 | if (info.has_lod_clamp) { | ||
| 574 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; | ||
| 575 | ctx.Add("MOV.F {}.w,{};" | ||
| 576 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", | ||
| 577 | dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | ||
| 578 | texture, type, offset_vec); | ||
| 579 | } else { | ||
| 580 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | ||
| 581 | texture, type, offset_vec); | ||
| 582 | } | ||
| 583 | } else { | ||
| 584 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, | ||
| 585 | derivatives_vec, texture, type, offset_vec); | ||
| 586 | } | ||
| 587 | StoreSparse(ctx, sparse_inst); | ||
| 588 | } | ||
| 589 | |||
| 590 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | ||
| 591 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 592 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 593 | const std::string_view format{FormatStorage(info.image_format)}; | ||
| 594 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 595 | const std::string_view type{TextureType(info)}; | ||
| 596 | const std::string image{Image(ctx, info, index)}; | ||
| 597 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 598 | ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); | ||
| 599 | StoreSparse(ctx, sparse_inst); | ||
| 600 | } | ||
| 601 | |||
| 602 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 603 | Register color) { | ||
| 604 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 605 | const std::string_view format{FormatStorage(info.image_format)}; | ||
| 606 | const std::string_view type{TextureType(info)}; | ||
| 607 | const std::string image{Image(ctx, info, index)}; | ||
| 608 | ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); | ||
| 609 | } | ||
| 610 | |||
| 611 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 612 | ScalarU32 value) { | ||
| 613 | ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); | ||
| 614 | } | ||
| 615 | |||
| 616 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 617 | ScalarS32 value) { | ||
| 618 | ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); | ||
| 619 | } | ||
| 620 | |||
| 621 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 622 | ScalarU32 value) { | ||
| 623 | ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); | ||
| 624 | } | ||
| 625 | |||
| 626 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 627 | ScalarS32 value) { | ||
| 628 | ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); | ||
| 629 | } | ||
| 630 | |||
| 631 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 632 | ScalarU32 value) { | ||
| 633 | ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); | ||
| 634 | } | ||
| 635 | |||
| 636 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 637 | ScalarU32 value) { | ||
| 638 | ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); | ||
| 639 | } | ||
| 640 | |||
| 641 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 642 | ScalarU32 value) { | ||
| 643 | ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); | ||
| 644 | } | ||
| 645 | |||
| 646 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 647 | ScalarU32 value) { | ||
| 648 | ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); | ||
| 649 | } | ||
| 650 | |||
| 651 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 652 | ScalarU32 value) { | ||
| 653 | ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); | ||
| 654 | } | ||
| 655 | |||
| 656 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 657 | ScalarU32 value) { | ||
| 658 | ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); | ||
| 659 | } | ||
| 660 | |||
| 661 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 662 | Register coord, ScalarU32 value) { | ||
| 663 | ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); | ||
| 664 | } | ||
| 665 | |||
| 666 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 667 | throw LogicError("Unreachable instruction"); | ||
| 668 | } | ||
| 669 | |||
| 670 | void EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 671 | throw LogicError("Unreachable instruction"); | ||
| 672 | } | ||
| 673 | |||
| 674 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 675 | throw LogicError("Unreachable instruction"); | ||
| 676 | } | ||
| 677 | |||
| 678 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 679 | throw LogicError("Unreachable instruction"); | ||
| 680 | } | ||
| 681 | |||
| 682 | void EmitBindlessImageGather(EmitContext&) { | ||
| 683 | throw LogicError("Unreachable instruction"); | ||
| 684 | } | ||
| 685 | |||
| 686 | void EmitBindlessImageGatherDref(EmitContext&) { | ||
| 687 | throw LogicError("Unreachable instruction"); | ||
| 688 | } | ||
| 689 | |||
| 690 | void EmitBindlessImageFetch(EmitContext&) { | ||
| 691 | throw LogicError("Unreachable instruction"); | ||
| 692 | } | ||
| 693 | |||
| 694 | void EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 695 | throw LogicError("Unreachable instruction"); | ||
| 696 | } | ||
| 697 | |||
| 698 | void EmitBindlessImageQueryLod(EmitContext&) { | ||
| 699 | throw LogicError("Unreachable instruction"); | ||
| 700 | } | ||
| 701 | |||
| 702 | void EmitBindlessImageGradient(EmitContext&) { | ||
| 703 | throw LogicError("Unreachable instruction"); | ||
| 704 | } | ||
| 705 | |||
| 706 | void EmitBindlessImageRead(EmitContext&) { | ||
| 707 | throw LogicError("Unreachable instruction"); | ||
| 708 | } | ||
| 709 | |||
| 710 | void EmitBindlessImageWrite(EmitContext&) { | ||
| 711 | throw LogicError("Unreachable instruction"); | ||
| 712 | } | ||
| 713 | |||
| 714 | void EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 715 | throw LogicError("Unreachable instruction"); | ||
| 716 | } | ||
| 717 | |||
| 718 | void EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 719 | throw LogicError("Unreachable instruction"); | ||
| 720 | } | ||
| 721 | |||
| 722 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 723 | throw LogicError("Unreachable instruction"); | ||
| 724 | } | ||
| 725 | |||
| 726 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 727 | throw LogicError("Unreachable instruction"); | ||
| 728 | } | ||
| 729 | |||
| 730 | void EmitBoundImageGather(EmitContext&) { | ||
| 731 | throw LogicError("Unreachable instruction"); | ||
| 732 | } | ||
| 733 | |||
| 734 | void EmitBoundImageGatherDref(EmitContext&) { | ||
| 735 | throw LogicError("Unreachable instruction"); | ||
| 736 | } | ||
| 737 | |||
| 738 | void EmitBoundImageFetch(EmitContext&) { | ||
| 739 | throw LogicError("Unreachable instruction"); | ||
| 740 | } | ||
| 741 | |||
| 742 | void EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 743 | throw LogicError("Unreachable instruction"); | ||
| 744 | } | ||
| 745 | |||
| 746 | void EmitBoundImageQueryLod(EmitContext&) { | ||
| 747 | throw LogicError("Unreachable instruction"); | ||
| 748 | } | ||
| 749 | |||
| 750 | void EmitBoundImageGradient(EmitContext&) { | ||
| 751 | throw LogicError("Unreachable instruction"); | ||
| 752 | } | ||
| 753 | |||
| 754 | void EmitBoundImageRead(EmitContext&) { | ||
| 755 | throw LogicError("Unreachable instruction"); | ||
| 756 | } | ||
| 757 | |||
| 758 | void EmitBoundImageWrite(EmitContext&) { | ||
| 759 | throw LogicError("Unreachable instruction"); | ||
| 760 | } | ||
| 761 | |||
| 762 | void EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 763 | throw LogicError("Unreachable instruction"); | ||
| 764 | } | ||
| 765 | |||
| 766 | void EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 767 | throw LogicError("Unreachable instruction"); | ||
| 768 | } | ||
| 769 | |||
| 770 | void EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 771 | throw LogicError("Unreachable instruction"); | ||
| 772 | } | ||
| 773 | |||
| 774 | void EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 775 | throw LogicError("Unreachable instruction"); | ||
| 776 | } | ||
| 777 | |||
| 778 | void EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 779 | throw LogicError("Unreachable instruction"); | ||
| 780 | } | ||
| 781 | |||
| 782 | void EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 783 | throw LogicError("Unreachable instruction"); | ||
| 784 | } | ||
| 785 | |||
| 786 | void EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 787 | throw LogicError("Unreachable instruction"); | ||
| 788 | } | ||
| 789 | |||
| 790 | void EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 791 | throw LogicError("Unreachable instruction"); | ||
| 792 | } | ||
| 793 | |||
| 794 | void EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 795 | throw LogicError("Unreachable instruction"); | ||
| 796 | } | ||
| 797 | |||
| 798 | void EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 799 | throw LogicError("Unreachable instruction"); | ||
| 800 | } | ||
| 801 | |||
| 802 | void EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 803 | throw LogicError("Unreachable instruction"); | ||
| 804 | } | ||
| 805 | |||
| 806 | void EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 807 | throw LogicError("Unreachable instruction"); | ||
| 808 | } | ||
| 809 | |||
| 810 | void EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 811 | throw LogicError("Unreachable instruction"); | ||
| 812 | } | ||
| 813 | |||
| 814 | void EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 815 | throw LogicError("Unreachable instruction"); | ||
| 816 | } | ||
| 817 | |||
| 818 | void EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 819 | throw LogicError("Unreachable instruction"); | ||
| 820 | } | ||
| 821 | |||
| 822 | void EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 823 | throw LogicError("Unreachable instruction"); | ||
| 824 | } | ||
| 825 | |||
| 826 | void EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 827 | throw LogicError("Unreachable instruction"); | ||
| 828 | } | ||
| 829 | |||
| 830 | void EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 831 | throw LogicError("Unreachable instruction"); | ||
| 832 | } | ||
| 833 | |||
| 834 | void EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 835 | throw LogicError("Unreachable instruction"); | ||
| 836 | } | ||
| 837 | |||
| 838 | void EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 839 | throw LogicError("Unreachable instruction"); | ||
| 840 | } | ||
| 841 | |||
| 842 | void EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 843 | throw LogicError("Unreachable instruction"); | ||
| 844 | } | ||
| 845 | |||
| 846 | void EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 847 | throw LogicError("Unreachable instruction"); | ||
| 848 | } | ||
| 849 | |||
| 850 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h new file mode 100644 index 000000000..12afda43b --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | |||
| @@ -0,0 +1,625 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | enum class Attribute : u64; | ||
| 12 | enum class Patch : u64; | ||
| 13 | class Inst; | ||
| 14 | class Value; | ||
| 15 | } // namespace Shader::IR | ||
| 16 | |||
| 17 | namespace Shader::Backend::GLASM { | ||
| 18 | |||
| 19 | class EmitContext; | ||
| 20 | |||
| 21 | // Microinstruction emitters | ||
| 22 | void EmitPhi(EmitContext& ctx, IR::Inst& inst); | ||
| 23 | void EmitVoid(EmitContext& ctx); | ||
| 24 | void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 26 | void EmitReference(EmitContext&, const IR::Value& value); | ||
| 27 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); | ||
| 28 | void EmitJoin(EmitContext& ctx); | ||
| 29 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 30 | void EmitBarrier(EmitContext& ctx); | ||
| 31 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 32 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 33 | void EmitPrologue(EmitContext& ctx); | ||
| 34 | void EmitEpilogue(EmitContext& ctx); | ||
| 35 | void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream); | ||
| 36 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 37 | void EmitGetRegister(EmitContext& ctx); | ||
| 38 | void EmitSetRegister(EmitContext& ctx); | ||
| 39 | void EmitGetPred(EmitContext& ctx); | ||
| 40 | void EmitSetPred(EmitContext& ctx); | ||
| 41 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 42 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 43 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 44 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 46 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 47 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 48 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 49 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 50 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 51 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 52 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); | ||
| 53 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); | ||
| 54 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); | ||
| 55 | void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); | ||
| 56 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); | ||
| 57 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); | ||
| 58 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); | ||
| 59 | void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); | ||
| 60 | void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); | ||
| 61 | void EmitGetZFlag(EmitContext& ctx); | ||
| 62 | void EmitGetSFlag(EmitContext& ctx); | ||
| 63 | void EmitGetCFlag(EmitContext& ctx); | ||
| 64 | void EmitGetOFlag(EmitContext& ctx); | ||
| 65 | void EmitSetZFlag(EmitContext& ctx); | ||
| 66 | void EmitSetSFlag(EmitContext& ctx); | ||
| 67 | void EmitSetCFlag(EmitContext& ctx); | ||
| 68 | void EmitSetOFlag(EmitContext& ctx); | ||
| 69 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); | ||
| 70 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 71 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 72 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | ||
| 73 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | ||
| 74 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | ||
| 75 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); | ||
| 76 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); | ||
| 77 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | ||
| 78 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); | ||
| 79 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); | ||
| 80 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); | ||
| 81 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); | ||
| 82 | void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 83 | void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 84 | void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 85 | void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 86 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 87 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 88 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 89 | void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value); | ||
| 90 | void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value); | ||
| 91 | void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value); | ||
| 92 | void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value); | ||
| 93 | void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); | ||
| 94 | void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); | ||
| 95 | void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); | ||
| 96 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 97 | ScalarU32 offset); | ||
| 98 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 99 | ScalarU32 offset); | ||
| 100 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 101 | ScalarU32 offset); | ||
| 102 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 103 | ScalarU32 offset); | ||
| 104 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 105 | ScalarU32 offset); | ||
| 106 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 107 | ScalarU32 offset); | ||
| 108 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 109 | ScalarU32 offset); | ||
| 110 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 111 | ScalarU32 value); | ||
| 112 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 113 | ScalarS32 value); | ||
| 114 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 115 | ScalarU32 value); | ||
| 116 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 117 | ScalarS32 value); | ||
| 118 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 119 | ScalarU32 value); | ||
| 120 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 121 | Register value); | ||
| 122 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 123 | Register value); | ||
| 124 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 125 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 126 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 127 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 128 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 129 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 130 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 131 | void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 132 | void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 133 | void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 134 | void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value); | ||
| 135 | void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value); | ||
| 136 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 137 | const IR::Value& e2); | ||
| 138 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 139 | const IR::Value& e2, const IR::Value& e3); | ||
| 140 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 141 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); | ||
| 142 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 143 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 144 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 145 | void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 146 | void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 147 | void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 148 | void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2); | ||
| 149 | void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3); | ||
| 150 | void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3, | ||
| 151 | Register e4); | ||
| 152 | void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index); | ||
| 153 | void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index); | ||
| 154 | void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); | ||
| 155 | void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 156 | void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 157 | void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 158 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 159 | const IR::Value& e2); | ||
| 160 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 161 | const IR::Value& e2, const IR::Value& e3); | ||
| 162 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 163 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); | ||
| 164 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 165 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 166 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 167 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 168 | ScalarF32 object, u32 index); | ||
| 169 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 170 | ScalarF32 object, u32 index); | ||
| 171 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 172 | ScalarF32 object, u32 index); | ||
| 173 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 174 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 175 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 176 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 177 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 178 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 179 | void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 180 | void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 181 | void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 182 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 183 | ScalarS32 false_value); | ||
| 184 | void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); | ||
| 185 | void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); | ||
| 186 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 187 | ScalarS32 false_value); | ||
| 188 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, | ||
| 189 | Register false_value); | ||
| 190 | void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); | ||
| 191 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 192 | ScalarS32 false_value); | ||
| 193 | void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); | ||
| 194 | void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 195 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 196 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 197 | void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 198 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 199 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 200 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 201 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 202 | void EmitPackFloat2x16(EmitContext& ctx, Register value); | ||
| 203 | void EmitUnpackFloat2x16(EmitContext& ctx, Register value); | ||
| 204 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 205 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 206 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 207 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 208 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 209 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 210 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 211 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 212 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 213 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 214 | void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 215 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 216 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 217 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 218 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 219 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 220 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); | ||
| 221 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); | ||
| 222 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); | ||
| 223 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 224 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 225 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 226 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 227 | void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 228 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 229 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 230 | void EmitFPNeg16(EmitContext& ctx, Register value); | ||
| 231 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); | ||
| 232 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 233 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 234 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 235 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 236 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 237 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 238 | void EmitFPRecip64(EmitContext& ctx, Register value); | ||
| 239 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 240 | void EmitFPRecipSqrt64(EmitContext& ctx, Register value); | ||
| 241 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 242 | void EmitFPSaturate16(EmitContext& ctx, Register value); | ||
| 243 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 244 | void EmitFPSaturate64(EmitContext& ctx, Register value); | ||
| 245 | void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); | ||
| 246 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, | ||
| 247 | ScalarF32 max_value); | ||
| 248 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, | ||
| 249 | ScalarF64 max_value); | ||
| 250 | void EmitFPRoundEven16(EmitContext& ctx, Register value); | ||
| 251 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 252 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 253 | void EmitFPFloor16(EmitContext& ctx, Register value); | ||
| 254 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 255 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 256 | void EmitFPCeil16(EmitContext& ctx, Register value); | ||
| 257 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 258 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 259 | void EmitFPTrunc16(EmitContext& ctx, Register value); | ||
| 260 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 261 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 262 | void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 263 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 264 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 265 | void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 266 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 267 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 268 | void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 269 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 270 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 271 | void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 272 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 273 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 274 | void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 275 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 276 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 277 | void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 278 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 279 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 280 | void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 281 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 282 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 283 | void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 284 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 285 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 286 | void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 287 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 288 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 289 | void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 290 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 291 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 292 | void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 293 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 294 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 295 | void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 296 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 297 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 298 | void EmitFPIsNan16(EmitContext& ctx, Register value); | ||
| 299 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 300 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 301 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 302 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 303 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 304 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 305 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 306 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 307 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 308 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 309 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); | ||
| 310 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); | ||
| 311 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); | ||
| 312 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 313 | ScalarU32 shift); | ||
| 314 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); | ||
| 315 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 316 | ScalarS32 shift); | ||
| 317 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 318 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 319 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 320 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, | ||
| 321 | ScalarS32 offset, ScalarS32 count); | ||
| 322 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, | ||
| 323 | ScalarS32 count); | ||
| 324 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | ||
| 325 | ScalarU32 count); | ||
| 326 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 327 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 328 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 329 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 330 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 331 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 332 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 333 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 334 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 335 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); | ||
| 336 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); | ||
| 337 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 338 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 339 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 340 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 341 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 342 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 343 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 344 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 345 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 346 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 347 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 348 | ScalarU32 value); | ||
| 349 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 350 | ScalarS32 value); | ||
| 351 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 352 | ScalarU32 value); | ||
| 353 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 354 | ScalarS32 value); | ||
| 355 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 356 | ScalarU32 value); | ||
| 357 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 358 | ScalarU32 value); | ||
| 359 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 360 | ScalarU32 value); | ||
| 361 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 362 | ScalarU32 value); | ||
| 363 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 364 | ScalarU32 value); | ||
| 365 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 366 | ScalarU32 value); | ||
| 367 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 368 | ScalarU32 value); | ||
| 369 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 370 | Register value); | ||
| 371 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 372 | ScalarU32 offset, ScalarU32 value); | ||
| 373 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 374 | ScalarU32 offset, ScalarS32 value); | ||
| 375 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 376 | ScalarU32 offset, ScalarU32 value); | ||
| 377 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 378 | ScalarU32 offset, ScalarS32 value); | ||
| 379 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 380 | ScalarU32 offset, ScalarU32 value); | ||
| 381 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 382 | ScalarU32 offset, ScalarU32 value); | ||
| 383 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 384 | ScalarU32 offset, ScalarU32 value); | ||
| 385 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 386 | ScalarU32 offset, ScalarU32 value); | ||
| 387 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 388 | ScalarU32 offset, ScalarU32 value); | ||
| 389 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 390 | ScalarU32 offset, ScalarU32 value); | ||
| 391 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 392 | ScalarU32 offset, ScalarU32 value); | ||
| 393 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 394 | ScalarU32 offset, Register value); | ||
| 395 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 396 | ScalarU32 offset, Register value); | ||
| 397 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 398 | ScalarU32 offset, Register value); | ||
| 399 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 400 | ScalarU32 offset, Register value); | ||
| 401 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 402 | ScalarU32 offset, Register value); | ||
| 403 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 404 | ScalarU32 offset, Register value); | ||
| 405 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 406 | ScalarU32 offset, Register value); | ||
| 407 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 408 | ScalarU32 offset, Register value); | ||
| 409 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 410 | ScalarU32 offset, Register value); | ||
| 411 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 412 | ScalarU32 offset, ScalarF32 value); | ||
| 413 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 414 | ScalarU32 offset, Register value); | ||
| 415 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 416 | ScalarU32 offset, Register value); | ||
| 417 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 418 | ScalarU32 offset, Register value); | ||
| 419 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 420 | ScalarU32 offset, Register value); | ||
| 421 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 422 | ScalarU32 offset, Register value); | ||
| 423 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 424 | ScalarU32 offset, Register value); | ||
| 425 | void EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 426 | void EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 427 | void EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 428 | void EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 429 | void EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 430 | void EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 431 | void EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 432 | void EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 433 | void EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 434 | void EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 435 | void EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 436 | void EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 437 | void EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 438 | void EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 439 | void EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 440 | void EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 441 | void EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 442 | void EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 443 | void EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 444 | void EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 445 | void EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 446 | void EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 447 | void EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 448 | void EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 449 | void EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 450 | void EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 451 | void EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 452 | void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 453 | void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 454 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 455 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 456 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 457 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 458 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 459 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 460 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 461 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 462 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 463 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 464 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 465 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 466 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 467 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 468 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 469 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 470 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 471 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 472 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 473 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 474 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 475 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 476 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 477 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 478 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 479 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 480 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 481 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 482 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 483 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 484 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 485 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 486 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 487 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 488 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 489 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 490 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 491 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 492 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 493 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 494 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 495 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 496 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 497 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 498 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 499 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 500 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 501 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 502 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 503 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 504 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 505 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 506 | void EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 507 | void EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 508 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 509 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 510 | void EmitBindlessImageGather(EmitContext&); | ||
| 511 | void EmitBindlessImageGatherDref(EmitContext&); | ||
| 512 | void EmitBindlessImageFetch(EmitContext&); | ||
| 513 | void EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 514 | void EmitBindlessImageQueryLod(EmitContext&); | ||
| 515 | void EmitBindlessImageGradient(EmitContext&); | ||
| 516 | void EmitBindlessImageRead(EmitContext&); | ||
| 517 | void EmitBindlessImageWrite(EmitContext&); | ||
| 518 | void EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 519 | void EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 520 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 521 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 522 | void EmitBoundImageGather(EmitContext&); | ||
| 523 | void EmitBoundImageGatherDref(EmitContext&); | ||
| 524 | void EmitBoundImageFetch(EmitContext&); | ||
| 525 | void EmitBoundImageQueryDimensions(EmitContext&); | ||
| 526 | void EmitBoundImageQueryLod(EmitContext&); | ||
| 527 | void EmitBoundImageGradient(EmitContext&); | ||
| 528 | void EmitBoundImageRead(EmitContext&); | ||
| 529 | void EmitBoundImageWrite(EmitContext&); | ||
| 530 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 531 | const IR::Value& coord, Register bias_lc, const IR::Value& offset); | ||
| 532 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 533 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); | ||
| 534 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 535 | const IR::Value& coord, const IR::Value& dref, | ||
| 536 | const IR::Value& bias_lc, const IR::Value& offset); | ||
| 537 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 538 | const IR::Value& coord, const IR::Value& dref, | ||
| 539 | const IR::Value& lod, const IR::Value& offset); | ||
| 540 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 541 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); | ||
| 542 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 543 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, | ||
| 544 | const IR::Value& dref); | ||
| 545 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 546 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); | ||
| 547 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 548 | ScalarS32 lod); | ||
| 549 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | ||
| 550 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 551 | const IR::Value& coord, const IR::Value& derivatives, | ||
| 552 | const IR::Value& offset, const IR::Value& lod_clamp); | ||
| 553 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | ||
| 554 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 555 | Register color); | ||
| 556 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 557 | void EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 558 | void EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 559 | void EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 560 | void EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 561 | void EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 562 | void EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 563 | void EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 564 | void EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 565 | void EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 566 | void EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 567 | void EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 568 | void EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 569 | void EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 570 | void EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 571 | void EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 572 | void EmitBoundImageAtomicInc32(EmitContext&); | ||
| 573 | void EmitBoundImageAtomicDec32(EmitContext&); | ||
| 574 | void EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 575 | void EmitBoundImageAtomicOr32(EmitContext&); | ||
| 576 | void EmitBoundImageAtomicXor32(EmitContext&); | ||
| 577 | void EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 578 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 579 | ScalarU32 value); | ||
| 580 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 581 | ScalarS32 value); | ||
| 582 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 583 | ScalarU32 value); | ||
| 584 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 585 | ScalarS32 value); | ||
| 586 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 587 | ScalarU32 value); | ||
| 588 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 589 | ScalarU32 value); | ||
| 590 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 591 | ScalarU32 value); | ||
| 592 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 593 | ScalarU32 value); | ||
| 594 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 595 | ScalarU32 value); | ||
| 596 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 597 | ScalarU32 value); | ||
| 598 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 599 | Register coord, ScalarU32 value); | ||
| 600 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst); | ||
| 601 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 602 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 603 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 604 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 605 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); | ||
| 606 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 607 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 608 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 609 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 610 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 611 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 612 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 613 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 614 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 615 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 616 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 617 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 618 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||
| 619 | ScalarU32 swizzle); | ||
| 620 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 621 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 622 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 623 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 624 | |||
| 625 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp new file mode 100644 index 000000000..f55c26b76 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b, | ||
| 12 | std::string_view lop) { | ||
| 13 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 14 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 15 | if (zero) { | ||
| 16 | zero->Invalidate(); | ||
| 17 | } | ||
| 18 | if (sign) { | ||
| 19 | sign->Invalidate(); | ||
| 20 | } | ||
| 21 | if (zero || sign) { | ||
| 22 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 23 | } | ||
| 24 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 25 | ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b); | ||
| 26 | if (zero) { | ||
| 27 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | ||
| 28 | } | ||
| 29 | if (sign) { | ||
| 30 | ctx.Add("SLT.S {},{},0;", *sign, ret); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 36 | const std::array flags{ | ||
| 37 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | ||
| 38 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | ||
| 39 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | ||
| 40 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | ||
| 41 | }; | ||
| 42 | for (IR::Inst* const flag_inst : flags) { | ||
| 43 | if (flag_inst) { | ||
| 44 | flag_inst->Invalidate(); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | const bool cc{inst.HasAssociatedPseudoOperation()}; | ||
| 48 | const std::string_view cc_mod{cc ? ".CC" : ""}; | ||
| 49 | if (cc) { | ||
| 50 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 51 | } | ||
| 52 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 53 | ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b); | ||
| 54 | if (!cc) { | ||
| 55 | return; | ||
| 56 | } | ||
| 57 | static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; | ||
| 58 | for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { | ||
| 59 | if (!flags[flag_index]) { | ||
| 60 | continue; | ||
| 61 | } | ||
| 62 | const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; | ||
| 63 | if (flag_index == 0) { | ||
| 64 | ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); | ||
| 65 | } else { | ||
| 66 | // We could use conditional execution here, but it's broken on Nvidia's compiler | ||
| 67 | ctx.Add("IF {}.x;" | ||
| 68 | "MOV.S {}.x,-1;" | ||
| 69 | "ELSE;" | ||
| 70 | "MOV.S {}.x,0;" | ||
| 71 | "ENDIF;", | ||
| 72 | masks[flag_index], flag_ret, flag_ret); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { | ||
| 78 | ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 82 | ctx.Add("SUB.S {}.x,{},{};", inst, a, b); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { | ||
| 86 | ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 90 | ctx.Add("MUL.S {}.x,{},{};", inst, a, b); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 94 | if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { | ||
| 95 | ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); | ||
| 96 | } else { | ||
| 97 | ctx.Add("MOV.S {},-{};", inst, value); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 102 | ctx.LongAdd("MOV.S64 {},-{};", inst, value); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 106 | ctx.Add("ABS.S {},{};", inst, value); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { | ||
| 110 | ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 114 | ScalarU32 shift) { | ||
| 115 | ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { | ||
| 119 | ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 123 | ScalarU32 shift) { | ||
| 124 | ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { | ||
| 128 | ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 132 | ScalarS32 shift) { | ||
| 133 | ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 137 | BitwiseLogicalOp(ctx, inst, a, b, "AND"); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 141 | BitwiseLogicalOp(ctx, inst, a, b, "OR"); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 145 | BitwiseLogicalOp(ctx, inst, a, b, "XOR"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, | ||
| 149 | ScalarS32 offset, ScalarS32 count) { | ||
| 150 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 151 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 152 | ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); | ||
| 153 | } else { | ||
| 154 | ctx.Add("MOV.S RC.x,{};" | ||
| 155 | "MOV.S RC.y,{};" | ||
| 156 | "BFI.S {},RC,{},{};", | ||
| 157 | count, offset, ret, insert, base); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, | ||
| 162 | ScalarS32 count) { | ||
| 163 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 164 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 165 | ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); | ||
| 166 | } else { | ||
| 167 | ctx.Add("MOV.S RC.x,{};" | ||
| 168 | "MOV.S RC.y,{};" | ||
| 169 | "BFE.S {},RC,{};", | ||
| 170 | count, offset, ret, base); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | ||
| 175 | ScalarU32 count) { | ||
| 176 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 177 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 178 | if (zero) { | ||
| 179 | zero->Invalidate(); | ||
| 180 | } | ||
| 181 | if (sign) { | ||
| 182 | sign->Invalidate(); | ||
| 183 | } | ||
| 184 | if (zero || sign) { | ||
| 185 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 186 | } | ||
| 187 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 188 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 189 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); | ||
| 190 | } else { | ||
| 191 | ctx.Add("MOV.U RC.x,{};" | ||
| 192 | "MOV.U RC.y,{};" | ||
| 193 | "BFE.U {},RC,{};", | ||
| 194 | count, offset, ret, base); | ||
| 195 | } | ||
| 196 | if (zero) { | ||
| 197 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | ||
| 198 | } | ||
| 199 | if (sign) { | ||
| 200 | ctx.Add("SLT.S {},{},0;", *sign, ret); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 205 | ctx.Add("BFR {},{};", inst, value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 209 | ctx.Add("BTC {},{};", inst, value); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 213 | ctx.Add("NOT.S {},{};", inst, value); | ||
| 214 | } | ||
| 215 | |||
| 216 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 217 | ctx.Add("BTFM.S {},{};", inst, value); | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 221 | ctx.Add("BTFM.U {},{};", inst, value); | ||
| 222 | } | ||
| 223 | |||
| 224 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 225 | ctx.Add("MIN.S {},{},{};", inst, a, b); | ||
| 226 | } | ||
| 227 | |||
| 228 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 229 | ctx.Add("MIN.U {},{},{};", inst, a, b); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 233 | ctx.Add("MAX.S {},{},{};", inst, a, b); | ||
| 234 | } | ||
| 235 | |||
| 236 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 237 | ctx.Add("MAX.U {},{},{};", inst, a, b); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { | ||
| 241 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 242 | ctx.Add("MIN.S RC.x,{},{};" | ||
| 243 | "MAX.S {}.x,RC.x,{};", | ||
| 244 | max, value, ret, min); | ||
| 245 | } | ||
| 246 | |||
| 247 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { | ||
| 248 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 249 | ctx.Add("MIN.U RC.x,{},{};" | ||
| 250 | "MAX.U {}.x,RC.x,{};", | ||
| 251 | max, value, ret, min); | ||
| 252 | } | ||
| 253 | |||
| 254 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 255 | ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs); | ||
| 256 | } | ||
| 257 | |||
| 258 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 259 | ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs); | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 263 | ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs); | ||
| 264 | } | ||
| 265 | |||
| 266 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 267 | ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs); | ||
| 268 | } | ||
| 269 | |||
| 270 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 271 | ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 272 | } | ||
| 273 | |||
| 274 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 275 | ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs); | ||
| 276 | } | ||
| 277 | |||
| 278 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 279 | ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs); | ||
| 280 | } | ||
| 281 | |||
| 282 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 283 | ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 284 | } | ||
| 285 | |||
| 286 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 287 | ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs); | ||
| 288 | } | ||
| 289 | |||
| 290 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 291 | ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 292 | } | ||
| 293 | |||
| 294 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp new file mode 100644 index 000000000..af9fac7c1 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -0,0 +1,568 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 16 | std::string_view then_expr, std::string_view else_expr = {}) { | ||
| 17 | // Operate on bindless SSBO, call the expression with bounds checking | ||
| 18 | // address = c[binding].xy | ||
| 19 | // length = c[binding].z | ||
| 20 | const u32 sb_binding{binding.U32()}; | ||
| 21 | ctx.Add("PK64.U DC,c[{}];" // pointer = address | ||
| 22 | "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) | ||
| 23 | "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset | ||
| 24 | "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length | ||
| 25 | sb_binding, offset, offset, sb_binding); | ||
| 26 | if (else_expr.empty()) { | ||
| 27 | ctx.Add("IF NE.x;{}ENDIF;", then_expr); | ||
| 28 | } else { | ||
| 29 | ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, | ||
| 34 | std::string_view else_expr = {}) { | ||
| 35 | const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; | ||
| 36 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 37 | if (!ctx.info.nvn_buffer_used[index]) { | ||
| 38 | continue; | ||
| 39 | } | ||
| 40 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | ||
| 41 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr | ||
| 42 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 | ||
| 43 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 | ||
| 44 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size | ||
| 45 | "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0 | ||
| 46 | "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0 | ||
| 47 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | ||
| 48 | "IF NE.x;" // if cond | ||
| 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | ||
| 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | ||
| 51 | address, address); | ||
| 52 | if (pointer_based) { | ||
| 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | ||
| 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | ||
| 55 | "{}" | ||
| 56 | "ELSE;", | ||
| 57 | index, expr); | ||
| 58 | } else { | ||
| 59 | ctx.Add("CVT.U32.U64 RC.x,DC.x;" | ||
| 60 | "{},ssbo{}[RC.x];" | ||
| 61 | "ELSE;", | ||
| 62 | expr, index); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | if (!else_expr.empty()) { | ||
| 66 | ctx.Add("{}", else_expr); | ||
| 67 | } | ||
| 68 | const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()}; | ||
| 69 | for (size_t index = 0; index < num_used_buffers; ++index) { | ||
| 70 | ctx.Add("ENDIF;"); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | template <typename ValueType> | ||
| 75 | void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, | ||
| 76 | std::string_view size) { | ||
| 77 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 78 | ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); | ||
| 79 | } else { | ||
| 80 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 85 | std::string_view size) { | ||
| 86 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 87 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 88 | ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); | ||
| 89 | } else { | ||
| 90 | StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 91 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | template <typename ValueType> | ||
| 96 | void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { | ||
| 97 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 98 | GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); | ||
| 99 | } else { | ||
| 100 | GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { | ||
| 105 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 106 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 107 | GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); | ||
| 108 | } else { | ||
| 109 | GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 110 | fmt::format("MOV.S {},0;", ret)); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | template <typename ValueType> | ||
| 115 | void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 116 | ValueType value, std::string_view operation, std::string_view size) { | ||
| 117 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 118 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 119 | ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), | ||
| 120 | offset); | ||
| 121 | } else { | ||
| 122 | StorageOp(ctx, binding, offset, | ||
| 123 | fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | } // Anonymous namespace | ||
| 127 | |||
| 128 | void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 129 | GlobalLoad(ctx, inst, address, "U8"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 133 | GlobalLoad(ctx, inst, address, "S8"); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 137 | GlobalLoad(ctx, inst, address, "U16"); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 141 | GlobalLoad(ctx, inst, address, "S16"); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 145 | GlobalLoad(ctx, inst, address, "U32"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 149 | GlobalLoad(ctx, inst, address, "U32X2"); | ||
| 150 | } | ||
| 151 | |||
| 152 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 153 | GlobalLoad(ctx, inst, address, "U32X4"); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) { | ||
| 157 | GlobalWrite(ctx, address, value, "U8"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) { | ||
| 161 | GlobalWrite(ctx, address, value, "S8"); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) { | ||
| 165 | GlobalWrite(ctx, address, value, "U16"); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) { | ||
| 169 | GlobalWrite(ctx, address, value, "S16"); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) { | ||
| 173 | GlobalWrite(ctx, address, value, "U32"); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) { | ||
| 177 | GlobalWrite(ctx, address, value, "U32X2"); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) { | ||
| 181 | GlobalWrite(ctx, address, value, "U32X4"); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 185 | ScalarU32 offset) { | ||
| 186 | Load(ctx, inst, binding, offset, "U8"); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 190 | ScalarU32 offset) { | ||
| 191 | Load(ctx, inst, binding, offset, "S8"); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 195 | ScalarU32 offset) { | ||
| 196 | Load(ctx, inst, binding, offset, "U16"); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 200 | ScalarU32 offset) { | ||
| 201 | Load(ctx, inst, binding, offset, "S16"); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 205 | ScalarU32 offset) { | ||
| 206 | Load(ctx, inst, binding, offset, "U32"); | ||
| 207 | } | ||
| 208 | |||
| 209 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 210 | ScalarU32 offset) { | ||
| 211 | Load(ctx, inst, binding, offset, "U32X2"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 215 | ScalarU32 offset) { | ||
| 216 | Load(ctx, inst, binding, offset, "U32X4"); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 220 | ScalarU32 value) { | ||
| 221 | Write(ctx, binding, offset, value, "U8"); | ||
| 222 | } | ||
| 223 | |||
| 224 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 225 | ScalarS32 value) { | ||
| 226 | Write(ctx, binding, offset, value, "S8"); | ||
| 227 | } | ||
| 228 | |||
| 229 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 230 | ScalarU32 value) { | ||
| 231 | Write(ctx, binding, offset, value, "U16"); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 235 | ScalarS32 value) { | ||
| 236 | Write(ctx, binding, offset, value, "S16"); | ||
| 237 | } | ||
| 238 | |||
| 239 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 240 | ScalarU32 value) { | ||
| 241 | Write(ctx, binding, offset, value, "U32"); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 245 | Register value) { | ||
| 246 | Write(ctx, binding, offset, value, "U32X2"); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 250 | Register value) { | ||
| 251 | Write(ctx, binding, offset, value, "U32X4"); | ||
| 252 | } | ||
| 253 | |||
| 254 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 255 | ScalarU32 value) { | ||
| 256 | ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 257 | } | ||
| 258 | |||
| 259 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 260 | ScalarS32 value) { | ||
| 261 | ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 265 | ScalarU32 value) { | ||
| 266 | ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 267 | } | ||
| 268 | |||
| 269 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 270 | ScalarS32 value) { | ||
| 271 | ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 272 | } | ||
| 273 | |||
| 274 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 275 | ScalarU32 value) { | ||
| 276 | ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 277 | } | ||
| 278 | |||
| 279 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 280 | ScalarU32 value) { | ||
| 281 | ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 282 | } | ||
| 283 | |||
| 284 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 285 | ScalarU32 value) { | ||
| 286 | ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 287 | } | ||
| 288 | |||
| 289 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 290 | ScalarU32 value) { | ||
| 291 | ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 292 | } | ||
| 293 | |||
| 294 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 295 | ScalarU32 value) { | ||
| 296 | ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 297 | } | ||
| 298 | |||
| 299 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 300 | ScalarU32 value) { | ||
| 301 | ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 302 | } | ||
| 303 | |||
| 304 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 305 | ScalarU32 value) { | ||
| 306 | ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 307 | } | ||
| 308 | |||
| 309 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 310 | Register value) { | ||
| 311 | ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 312 | } | ||
| 313 | |||
| 314 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 315 | ScalarU32 offset, ScalarU32 value) { | ||
| 316 | Atom(ctx, inst, binding, offset, value, "ADD", "U32"); | ||
| 317 | } | ||
| 318 | |||
| 319 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 320 | ScalarU32 offset, ScalarS32 value) { | ||
| 321 | Atom(ctx, inst, binding, offset, value, "MIN", "S32"); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 325 | ScalarU32 offset, ScalarU32 value) { | ||
| 326 | Atom(ctx, inst, binding, offset, value, "MIN", "U32"); | ||
| 327 | } | ||
| 328 | |||
| 329 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 330 | ScalarU32 offset, ScalarS32 value) { | ||
| 331 | Atom(ctx, inst, binding, offset, value, "MAX", "S32"); | ||
| 332 | } | ||
| 333 | |||
| 334 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 335 | ScalarU32 offset, ScalarU32 value) { | ||
| 336 | Atom(ctx, inst, binding, offset, value, "MAX", "U32"); | ||
| 337 | } | ||
| 338 | |||
| 339 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 340 | ScalarU32 offset, ScalarU32 value) { | ||
| 341 | Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); | ||
| 342 | } | ||
| 343 | |||
| 344 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 345 | ScalarU32 offset, ScalarU32 value) { | ||
| 346 | Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); | ||
| 347 | } | ||
| 348 | |||
| 349 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 350 | ScalarU32 offset, ScalarU32 value) { | ||
| 351 | Atom(ctx, inst, binding, offset, value, "AND", "U32"); | ||
| 352 | } | ||
| 353 | |||
| 354 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 355 | ScalarU32 offset, ScalarU32 value) { | ||
| 356 | Atom(ctx, inst, binding, offset, value, "OR", "U32"); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 360 | ScalarU32 offset, ScalarU32 value) { | ||
| 361 | Atom(ctx, inst, binding, offset, value, "XOR", "U32"); | ||
| 362 | } | ||
| 363 | |||
| 364 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 365 | ScalarU32 offset, ScalarU32 value) { | ||
| 366 | Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); | ||
| 367 | } | ||
| 368 | |||
| 369 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 370 | ScalarU32 offset, Register value) { | ||
| 371 | Atom(ctx, inst, binding, offset, value, "ADD", "U64"); | ||
| 372 | } | ||
| 373 | |||
| 374 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 375 | ScalarU32 offset, Register value) { | ||
| 376 | Atom(ctx, inst, binding, offset, value, "MIN", "S64"); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 380 | ScalarU32 offset, Register value) { | ||
| 381 | Atom(ctx, inst, binding, offset, value, "MIN", "U64"); | ||
| 382 | } | ||
| 383 | |||
| 384 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 385 | ScalarU32 offset, Register value) { | ||
| 386 | Atom(ctx, inst, binding, offset, value, "MAX", "S64"); | ||
| 387 | } | ||
| 388 | |||
| 389 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 390 | ScalarU32 offset, Register value) { | ||
| 391 | Atom(ctx, inst, binding, offset, value, "MAX", "U64"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 395 | ScalarU32 offset, Register value) { | ||
| 396 | Atom(ctx, inst, binding, offset, value, "AND", "U64"); | ||
| 397 | } | ||
| 398 | |||
| 399 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 400 | ScalarU32 offset, Register value) { | ||
| 401 | Atom(ctx, inst, binding, offset, value, "OR", "U64"); | ||
| 402 | } | ||
| 403 | |||
| 404 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 405 | ScalarU32 offset, Register value) { | ||
| 406 | Atom(ctx, inst, binding, offset, value, "XOR", "U64"); | ||
| 407 | } | ||
| 408 | |||
| 409 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 410 | ScalarU32 offset, Register value) { | ||
| 411 | Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); | ||
| 412 | } | ||
| 413 | |||
| 414 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 415 | ScalarU32 offset, ScalarF32 value) { | ||
| 416 | Atom(ctx, inst, binding, offset, value, "ADD", "F32"); | ||
| 417 | } | ||
| 418 | |||
| 419 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 420 | ScalarU32 offset, Register value) { | ||
| 421 | Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); | ||
| 422 | } | ||
| 423 | |||
| 424 | void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 425 | [[maybe_unused]] const IR::Value& binding, | ||
| 426 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 427 | throw NotImplementedException("GLASM instruction"); | ||
| 428 | } | ||
| 429 | |||
| 430 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 431 | ScalarU32 offset, Register value) { | ||
| 432 | Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 436 | [[maybe_unused]] const IR::Value& binding, | ||
| 437 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 438 | throw NotImplementedException("GLASM instruction"); | ||
| 439 | } | ||
| 440 | |||
| 441 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 442 | ScalarU32 offset, Register value) { | ||
| 443 | Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); | ||
| 444 | } | ||
| 445 | |||
| 446 | void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 447 | [[maybe_unused]] const IR::Value& binding, | ||
| 448 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 449 | throw NotImplementedException("GLASM instruction"); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 453 | throw NotImplementedException("GLASM instruction"); | ||
| 454 | } | ||
| 455 | |||
| 456 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 457 | throw NotImplementedException("GLASM instruction"); | ||
| 458 | } | ||
| 459 | |||
| 460 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 461 | throw NotImplementedException("GLASM instruction"); | ||
| 462 | } | ||
| 463 | |||
| 464 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 465 | throw NotImplementedException("GLASM instruction"); | ||
| 466 | } | ||
| 467 | |||
| 468 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 469 | throw NotImplementedException("GLASM instruction"); | ||
| 470 | } | ||
| 471 | |||
| 472 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 473 | throw NotImplementedException("GLASM instruction"); | ||
| 474 | } | ||
| 475 | |||
| 476 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 477 | throw NotImplementedException("GLASM instruction"); | ||
| 478 | } | ||
| 479 | |||
| 480 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 481 | throw NotImplementedException("GLASM instruction"); | ||
| 482 | } | ||
| 483 | |||
| 484 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 485 | throw NotImplementedException("GLASM instruction"); | ||
| 486 | } | ||
| 487 | |||
| 488 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 489 | throw NotImplementedException("GLASM instruction"); | ||
| 490 | } | ||
| 491 | |||
| 492 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 493 | throw NotImplementedException("GLASM instruction"); | ||
| 494 | } | ||
| 495 | |||
| 496 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 497 | throw NotImplementedException("GLASM instruction"); | ||
| 498 | } | ||
| 499 | |||
| 500 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 501 | throw NotImplementedException("GLASM instruction"); | ||
| 502 | } | ||
| 503 | |||
| 504 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 505 | throw NotImplementedException("GLASM instruction"); | ||
| 506 | } | ||
| 507 | |||
| 508 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 509 | throw NotImplementedException("GLASM instruction"); | ||
| 510 | } | ||
| 511 | |||
| 512 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 513 | throw NotImplementedException("GLASM instruction"); | ||
| 514 | } | ||
| 515 | |||
| 516 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 517 | throw NotImplementedException("GLASM instruction"); | ||
| 518 | } | ||
| 519 | |||
| 520 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 521 | throw NotImplementedException("GLASM instruction"); | ||
| 522 | } | ||
| 523 | |||
| 524 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 525 | throw NotImplementedException("GLASM instruction"); | ||
| 526 | } | ||
| 527 | |||
| 528 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 529 | throw NotImplementedException("GLASM instruction"); | ||
| 530 | } | ||
| 531 | |||
| 532 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 533 | throw NotImplementedException("GLASM instruction"); | ||
| 534 | } | ||
| 535 | |||
| 536 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 537 | throw NotImplementedException("GLASM instruction"); | ||
| 538 | } | ||
| 539 | |||
| 540 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 541 | throw NotImplementedException("GLASM instruction"); | ||
| 542 | } | ||
| 543 | |||
| 544 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 545 | throw NotImplementedException("GLASM instruction"); | ||
| 546 | } | ||
| 547 | |||
| 548 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 549 | throw NotImplementedException("GLASM instruction"); | ||
| 550 | } | ||
| 551 | |||
| 552 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 553 | throw NotImplementedException("GLASM instruction"); | ||
| 554 | } | ||
| 555 | |||
| 556 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 557 | throw NotImplementedException("GLASM instruction"); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 561 | throw NotImplementedException("GLASM instruction"); | ||
| 562 | } | ||
| 563 | |||
| 564 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 565 | throw NotImplementedException("GLASM instruction"); | ||
| 566 | } | ||
| 567 | |||
| 568 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp new file mode 100644 index 000000000..ff64c6924 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | #ifdef _MSC_VER | ||
| 13 | #pragma warning(disable : 4100) | ||
| 14 | #endif | ||
| 15 | |||
| 16 | namespace Shader::Backend::GLASM { | ||
| 17 | |||
| 18 | #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) | ||
| 19 | |||
| 20 | static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 21 | switch (phi.Arg(0).Type()) { | ||
| 22 | case IR::Type::U1: | ||
| 23 | case IR::Type::U32: | ||
| 24 | case IR::Type::F32: | ||
| 25 | ctx.reg_alloc.Define(phi); | ||
| 26 | break; | ||
| 27 | case IR::Type::U64: | ||
| 28 | case IR::Type::F64: | ||
| 29 | ctx.reg_alloc.LongDefine(phi); | ||
| 30 | break; | ||
| 31 | default: | ||
| 32 | throw NotImplementedException("Phi node type {}", phi.Type()); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 37 | const size_t num_args{phi.NumArgs()}; | ||
| 38 | for (size_t i = 0; i < num_args; ++i) { | ||
| 39 | ctx.reg_alloc.Consume(phi.Arg(i)); | ||
| 40 | } | ||
| 41 | if (!phi.Definition<Id>().is_valid) { | ||
| 42 | // The phi node wasn't forward defined | ||
| 43 | DefinePhi(ctx, phi); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitVoid(EmitContext&) {} | ||
| 48 | |||
| 49 | void EmitReference(EmitContext& ctx, const IR::Value& value) { | ||
| 50 | ctx.reg_alloc.Consume(value); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { | ||
| 54 | IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; | ||
| 55 | if (!phi.Definition<Id>().is_valid) { | ||
| 56 | // The phi node wasn't forward defined | ||
| 57 | DefinePhi(ctx, phi); | ||
| 58 | } | ||
| 59 | const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; | ||
| 60 | const Value eval_value{ctx.reg_alloc.Consume(value)}; | ||
| 61 | |||
| 62 | if (phi_reg == eval_value) { | ||
| 63 | return; | ||
| 64 | } | ||
| 65 | switch (phi.Flags<IR::Type>()) { | ||
| 66 | case IR::Type::U1: | ||
| 67 | case IR::Type::U32: | ||
| 68 | case IR::Type::F32: | ||
| 69 | ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value}); | ||
| 70 | break; | ||
| 71 | case IR::Type::U64: | ||
| 72 | case IR::Type::F64: | ||
| 73 | ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value}); | ||
| 74 | break; | ||
| 75 | default: | ||
| 76 | throw NotImplementedException("Phi node type {}", phi.Type()); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void EmitJoin(EmitContext& ctx) { | ||
| 81 | NotImplemented(); | ||
| 82 | } | ||
| 83 | |||
| 84 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 85 | ctx.Add("KIL TR.x;"); | ||
| 86 | } | ||
| 87 | |||
| 88 | void EmitBarrier(EmitContext& ctx) { | ||
| 89 | ctx.Add("BAR;"); | ||
| 90 | } | ||
| 91 | |||
| 92 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 93 | ctx.Add("MEMBAR.CTA;"); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 97 | ctx.Add("MEMBAR;"); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitPrologue(EmitContext& ctx) { | ||
| 101 | // TODO | ||
| 102 | } | ||
| 103 | |||
| 104 | void EmitEpilogue(EmitContext& ctx) { | ||
| 105 | // TODO | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { | ||
| 109 | if (stream.type == Type::U32 && stream.imm_u32 == 0) { | ||
| 110 | ctx.Add("EMIT;"); | ||
| 111 | } else { | ||
| 112 | ctx.Add("EMITS {};", stream); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 117 | if (!stream.IsImmediate()) { | ||
| 118 | LOG_WARNING(Shader_GLASM, "Stream is not immediate"); | ||
| 119 | } | ||
| 120 | ctx.reg_alloc.Consume(stream); | ||
| 121 | ctx.Add("ENDPRIM;"); | ||
| 122 | } | ||
| 123 | |||
| 124 | void EmitGetRegister(EmitContext& ctx) { | ||
| 125 | NotImplemented(); | ||
| 126 | } | ||
| 127 | |||
| 128 | void EmitSetRegister(EmitContext& ctx) { | ||
| 129 | NotImplemented(); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitGetPred(EmitContext& ctx) { | ||
| 133 | NotImplemented(); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitSetPred(EmitContext& ctx) { | ||
| 137 | NotImplemented(); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitSetGotoVariable(EmitContext& ctx) { | ||
| 141 | NotImplemented(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitGetGotoVariable(EmitContext& ctx) { | ||
| 145 | NotImplemented(); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitSetIndirectBranchVariable(EmitContext& ctx) { | ||
| 149 | NotImplemented(); | ||
| 150 | } | ||
| 151 | |||
| 152 | void EmitGetIndirectBranchVariable(EmitContext& ctx) { | ||
| 153 | NotImplemented(); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitGetZFlag(EmitContext& ctx) { | ||
| 157 | NotImplemented(); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitGetSFlag(EmitContext& ctx) { | ||
| 161 | NotImplemented(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitGetCFlag(EmitContext& ctx) { | ||
| 165 | NotImplemented(); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitGetOFlag(EmitContext& ctx) { | ||
| 169 | NotImplemented(); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitSetZFlag(EmitContext& ctx) { | ||
| 173 | NotImplemented(); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitSetSFlag(EmitContext& ctx) { | ||
| 177 | NotImplemented(); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitSetCFlag(EmitContext& ctx) { | ||
| 181 | NotImplemented(); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitSetOFlag(EmitContext& ctx) { | ||
| 185 | NotImplemented(); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { | ||
| 189 | ctx.Add("MOV.S {},invocation.groupid;", inst); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 193 | ctx.Add("MOV.S {},invocation.localid;", inst); | ||
| 194 | } | ||
| 195 | |||
| 196 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 197 | ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { | ||
| 201 | ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { | ||
| 205 | ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | ||
| 209 | ctx.uses_y_direction = true; | ||
| 210 | ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); | ||
| 211 | } | ||
| 212 | |||
| 213 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { | ||
| 214 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 215 | } | ||
| 216 | |||
| 217 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { | ||
| 218 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { | ||
| 222 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 223 | } | ||
| 224 | |||
| 225 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { | ||
| 226 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 227 | } | ||
| 228 | |||
| 229 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { | ||
| 230 | ctx.LongAdd("MOV.S64 {}.x,0;", inst); | ||
| 231 | } | ||
| 232 | |||
| 233 | void EmitGetZeroFromOp(EmitContext& ctx) { | ||
| 234 | NotImplemented(); | ||
| 235 | } | ||
| 236 | |||
| 237 | void EmitGetSignFromOp(EmitContext& ctx) { | ||
| 238 | NotImplemented(); | ||
| 239 | } | ||
| 240 | |||
| 241 | void EmitGetCarryFromOp(EmitContext& ctx) { | ||
| 242 | NotImplemented(); | ||
| 243 | } | ||
| 244 | |||
| 245 | void EmitGetOverflowFromOp(EmitContext& ctx) { | ||
| 246 | NotImplemented(); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitGetSparseFromOp(EmitContext& ctx) { | ||
| 250 | NotImplemented(); | ||
| 251 | } | ||
| 252 | |||
| 253 | void EmitGetInBoundsFromOp(EmitContext& ctx) { | ||
| 254 | NotImplemented(); | ||
| 255 | } | ||
| 256 | |||
| 257 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 258 | ctx.Add("OR.S {},{},{};", inst, a, b); | ||
| 259 | } | ||
| 260 | |||
| 261 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 262 | ctx.Add("AND.S {},{},{};", inst, a, b); | ||
| 263 | } | ||
| 264 | |||
| 265 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 266 | ctx.Add("XOR.S {},{},{};", inst, a, b); | ||
| 267 | } | ||
| 268 | |||
| 269 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 270 | ctx.Add("SEQ.S {},{},0;", inst, value); | ||
| 271 | } | ||
| 272 | |||
| 273 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp new file mode 100644 index 000000000..68fff613c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | |||
| 2 | // Copyright 2021 yuzu Emulator Project | ||
| 3 | // Licensed under GPLv2 or any later version | ||
| 4 | // Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 7 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | |||
| 12 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 13 | ScalarS32 false_value) { | ||
| 14 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 18 | [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { | ||
| 19 | throw NotImplementedException("GLASM instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 23 | [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { | ||
| 24 | throw NotImplementedException("GLASM instruction"); | ||
| 25 | } | ||
| 26 | |||
| 27 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 28 | ScalarS32 false_value) { | ||
| 29 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, | ||
| 33 | Register false_value) { | ||
| 34 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 35 | const Register ret{ctx.reg_alloc.LongDefine(inst)}; | ||
| 36 | if (ret == true_value) { | ||
| 37 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 38 | "MOV.U64 {}.x(EQ.x),{};", | ||
| 39 | cond, ret, false_value); | ||
| 40 | } else if (ret == false_value) { | ||
| 41 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 42 | "MOV.U64 {}.x(NE.x),{};", | ||
| 43 | cond, ret, true_value); | ||
| 44 | } else { | ||
| 45 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 46 | "MOV.U64 {}.x,{};" | ||
| 47 | "MOV.U64 {}.x(NE.x),{};", | ||
| 48 | cond, ret, false_value, ret, true_value); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 53 | [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { | ||
| 54 | throw NotImplementedException("GLASM instruction"); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 58 | ScalarS32 false_value) { | ||
| 59 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 63 | [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { | ||
| 64 | throw NotImplementedException("GLASM instruction"); | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp new file mode 100644 index 000000000..c1498f449 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | |||
| 2 | // Copyright 2021 yuzu Emulator Project | ||
| 3 | // Licensed under GPLv2 or any later version | ||
| 4 | // Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 7 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 12 | ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset); | ||
| 13 | } | ||
| 14 | |||
| 15 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 16 | ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset); | ||
| 17 | } | ||
| 18 | |||
| 19 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 20 | ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset); | ||
| 21 | } | ||
| 22 | |||
| 23 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 24 | ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset); | ||
| 25 | } | ||
| 26 | |||
| 27 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 28 | ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset); | ||
| 29 | } | ||
| 30 | |||
| 31 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 32 | ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 36 | ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 40 | ctx.Add("STS.U8 {},shared_mem[{}];", value, offset); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 44 | ctx.Add("STS.U16 {},shared_mem[{}];", value, offset); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 48 | ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { | ||
| 52 | ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { | ||
| 56 | ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset); | ||
| 57 | } | ||
| 58 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp new file mode 100644 index 000000000..544d475b4 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | #include "shader_recompiler/profile.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | |||
| 12 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | ||
| 13 | ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name); | ||
| 14 | } | ||
| 15 | |||
| 16 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 17 | ctx.Add("TGALL.S {}.x,{};", inst, pred); | ||
| 18 | } | ||
| 19 | |||
| 20 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 21 | ctx.Add("TGANY.S {}.x,{};", inst, pred); | ||
| 22 | } | ||
| 23 | |||
| 24 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 25 | ctx.Add("TGEQ.S {}.x,{};", inst, pred); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 29 | ctx.Add("TGBALLOT {}.x,{};", inst, pred); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 33 | ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 37 | ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 41 | ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 45 | ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 49 | ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name); | ||
| 50 | } | ||
| 51 | |||
| 52 | static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 53 | const IR::Value& clamp, const IR::Value& segmentation_mask, | ||
| 54 | std::string_view op) { | ||
| 55 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 56 | if (in_bounds) { | ||
| 57 | in_bounds->Invalidate(); | ||
| 58 | } | ||
| 59 | std::string mask; | ||
| 60 | if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { | ||
| 61 | mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); | ||
| 62 | } else { | ||
| 63 | mask = "RC"; | ||
| 64 | ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};", | ||
| 65 | ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)}, | ||
| 66 | ScalarU32{ctx.reg_alloc.Consume(clamp)}); | ||
| 67 | } | ||
| 68 | const Register value_ret{ctx.reg_alloc.Define(inst)}; | ||
| 69 | if (in_bounds) { | ||
| 70 | const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; | ||
| 71 | ctx.Add("SHF{}.U {},{},{},{};" | ||
| 72 | "MOV.U {}.x,{}.y;", | ||
| 73 | op, bounds_ret, value, index, mask, value_ret, bounds_ret); | ||
| 74 | } else { | ||
| 75 | ctx.Add("SHF{}.U {},{},{},{};" | ||
| 76 | "MOV.U {}.x,{}.y;", | ||
| 77 | op, value_ret, value, index, mask, value_ret, value_ret); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 82 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 83 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX"); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 87 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 88 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP"); | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 92 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 93 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN"); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 97 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 98 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||
| 102 | ScalarU32 swizzle) { | ||
| 103 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 104 | ctx.Add("AND.U RC.z,{}.threadid,3;" | ||
| 105 | "SHL.U RC.z,RC.z,1;" | ||
| 106 | "SHR.U RC.z,{},RC.z;" | ||
| 107 | "AND.U RC.z,RC.z,3;" | ||
| 108 | "MUL.F RC.x,{},FSWZA[RC.z];" | ||
| 109 | "MUL.F RC.y,{},FSWZB[RC.z];" | ||
| 110 | "ADD.F {}.x,RC.x,RC.y;", | ||
| 111 | ctx.stage_name, swizzle, op_a, op_b, ret); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 115 | if (ctx.profile.support_derivative_control) { | ||
| 116 | ctx.Add("DDX.FINE {}.x,{};", inst, p); | ||
| 117 | } else { | ||
| 118 | LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); | ||
| 119 | ctx.Add("DDX {}.x,{};", inst, p); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 124 | if (ctx.profile.support_derivative_control) { | ||
| 125 | ctx.Add("DDY.FINE {}.x,{};", inst, p); | ||
| 126 | } else { | ||
| 127 | LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); | ||
| 128 | ctx.Add("DDY {}.x,{};", inst, p); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 133 | if (ctx.profile.support_derivative_control) { | ||
| 134 | ctx.Add("DDX.COARSE {}.x,{};", inst, p); | ||
| 135 | } else { | ||
| 136 | LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); | ||
| 137 | ctx.Add("DDX {}.x,{};", inst, p); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 142 | if (ctx.profile.support_derivative_control) { | ||
| 143 | ctx.Add("DDY.COARSE {}.x,{};", inst, p); | ||
| 144 | } else { | ||
| 145 | LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); | ||
| 146 | ctx.Add("DDY {}.x,{};", inst, p); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp new file mode 100644 index 000000000..4c046db6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 10 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLASM { | ||
| 15 | |||
| 16 | Register RegAlloc::Define(IR::Inst& inst) { | ||
| 17 | return Define(inst, false); | ||
| 18 | } | ||
| 19 | |||
| 20 | Register RegAlloc::LongDefine(IR::Inst& inst) { | ||
| 21 | return Define(inst, true); | ||
| 22 | } | ||
| 23 | |||
| 24 | Value RegAlloc::Peek(const IR::Value& value) { | ||
| 25 | if (value.IsImmediate()) { | ||
| 26 | return MakeImm(value); | ||
| 27 | } else { | ||
| 28 | return PeekInst(*value.Inst()); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | Value RegAlloc::Consume(const IR::Value& value) { | ||
| 33 | if (value.IsImmediate()) { | ||
| 34 | return MakeImm(value); | ||
| 35 | } else { | ||
| 36 | return ConsumeInst(*value.Inst()); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | void RegAlloc::Unref(IR::Inst& inst) { | ||
| 41 | IR::Inst& value_inst{AliasInst(inst)}; | ||
| 42 | value_inst.DestructiveRemoveUsage(); | ||
| 43 | if (!value_inst.HasUses()) { | ||
| 44 | Free(value_inst.Definition<Id>()); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | Register RegAlloc::AllocReg() { | ||
| 49 | Register ret; | ||
| 50 | ret.type = Type::Register; | ||
| 51 | ret.id = Alloc(false); | ||
| 52 | return ret; | ||
| 53 | } | ||
| 54 | |||
| 55 | Register RegAlloc::AllocLongReg() { | ||
| 56 | Register ret; | ||
| 57 | ret.type = Type::Register; | ||
| 58 | ret.id = Alloc(true); | ||
| 59 | return ret; | ||
| 60 | } | ||
| 61 | |||
| 62 | void RegAlloc::FreeReg(Register reg) { | ||
| 63 | Free(reg.id); | ||
| 64 | } | ||
| 65 | |||
| 66 | Value RegAlloc::MakeImm(const IR::Value& value) { | ||
| 67 | Value ret; | ||
| 68 | switch (value.Type()) { | ||
| 69 | case IR::Type::Void: | ||
| 70 | ret.type = Type::Void; | ||
| 71 | break; | ||
| 72 | case IR::Type::U1: | ||
| 73 | ret.type = Type::U32; | ||
| 74 | ret.imm_u32 = value.U1() ? 0xffffffff : 0; | ||
| 75 | break; | ||
| 76 | case IR::Type::U32: | ||
| 77 | ret.type = Type::U32; | ||
| 78 | ret.imm_u32 = value.U32(); | ||
| 79 | break; | ||
| 80 | case IR::Type::F32: | ||
| 81 | ret.type = Type::U32; | ||
| 82 | ret.imm_u32 = Common::BitCast<u32>(value.F32()); | ||
| 83 | break; | ||
| 84 | case IR::Type::U64: | ||
| 85 | ret.type = Type::U64; | ||
| 86 | ret.imm_u64 = value.U64(); | ||
| 87 | break; | ||
| 88 | case IR::Type::F64: | ||
| 89 | ret.type = Type::U64; | ||
| 90 | ret.imm_u64 = Common::BitCast<u64>(value.F64()); | ||
| 91 | break; | ||
| 92 | default: | ||
| 93 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 94 | } | ||
| 95 | return ret; | ||
| 96 | } | ||
| 97 | |||
| 98 | Register RegAlloc::Define(IR::Inst& inst, bool is_long) { | ||
| 99 | if (inst.HasUses()) { | ||
| 100 | inst.SetDefinition<Id>(Alloc(is_long)); | ||
| 101 | } else { | ||
| 102 | Id id{}; | ||
| 103 | id.is_long.Assign(is_long ? 1 : 0); | ||
| 104 | id.is_null.Assign(1); | ||
| 105 | inst.SetDefinition<Id>(id); | ||
| 106 | } | ||
| 107 | return Register{PeekInst(inst)}; | ||
| 108 | } | ||
| 109 | |||
| 110 | Value RegAlloc::PeekInst(IR::Inst& inst) { | ||
| 111 | Value ret; | ||
| 112 | ret.type = Type::Register; | ||
| 113 | ret.id = inst.Definition<Id>(); | ||
| 114 | return ret; | ||
| 115 | } | ||
| 116 | |||
| 117 | Value RegAlloc::ConsumeInst(IR::Inst& inst) { | ||
| 118 | Unref(inst); | ||
| 119 | return PeekInst(inst); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id RegAlloc::Alloc(bool is_long) { | ||
| 123 | size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; | ||
| 124 | std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use}; | ||
| 125 | if (num_used_registers + num_used_long_registers < NUM_REGS) { | ||
| 126 | for (size_t reg = 0; reg < NUM_REGS; ++reg) { | ||
| 127 | if (use[reg]) { | ||
| 128 | continue; | ||
| 129 | } | ||
| 130 | num_regs = std::max(num_regs, reg + 1); | ||
| 131 | use[reg] = true; | ||
| 132 | Id ret{}; | ||
| 133 | ret.is_valid.Assign(1); | ||
| 134 | ret.is_long.Assign(is_long ? 1 : 0); | ||
| 135 | ret.is_spill.Assign(0); | ||
| 136 | ret.is_condition_code.Assign(0); | ||
| 137 | ret.is_null.Assign(0); | ||
| 138 | ret.index.Assign(static_cast<u32>(reg)); | ||
| 139 | return ret; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | throw NotImplementedException("Register spilling"); | ||
| 143 | } | ||
| 144 | |||
| 145 | void RegAlloc::Free(Id id) { | ||
| 146 | if (id.is_valid == 0) { | ||
| 147 | throw LogicError("Freeing invalid register"); | ||
| 148 | } | ||
| 149 | if (id.is_spill != 0) { | ||
| 150 | throw NotImplementedException("Free spill"); | ||
| 151 | } | ||
| 152 | if (id.is_long != 0) { | ||
| 153 | long_register_use[id.index] = false; | ||
| 154 | } else { | ||
| 155 | register_use[id.index] = false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | /*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { | ||
| 160 | switch (inst.GetOpcode()) { | ||
| 161 | case IR::Opcode::Identity: | ||
| 162 | case IR::Opcode::BitCastU16F16: | ||
| 163 | case IR::Opcode::BitCastU32F32: | ||
| 164 | case IR::Opcode::BitCastU64F64: | ||
| 165 | case IR::Opcode::BitCastF16U16: | ||
| 166 | case IR::Opcode::BitCastF32U32: | ||
| 167 | case IR::Opcode::BitCastF64U64: | ||
| 168 | return true; | ||
| 169 | default: | ||
| 170 | return false; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | /*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { | ||
| 175 | IR::Inst* it{&inst}; | ||
| 176 | while (IsAliased(*it)) { | ||
| 177 | const IR::Value arg{it->Arg(0)}; | ||
| 178 | if (arg.IsImmediate()) { | ||
| 179 | break; | ||
| 180 | } | ||
| 181 | it = arg.InstRecursive(); | ||
| 182 | } | ||
| 183 | return *it; | ||
| 184 | } | ||
| 185 | |||
| 186 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h new file mode 100644 index 000000000..82aec66c6 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h | |||
| @@ -0,0 +1,303 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/bit_cast.h" | ||
| 12 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "shader_recompiler/exception.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | class Inst; | ||
| 18 | class Value; | ||
| 19 | } // namespace Shader::IR | ||
| 20 | |||
| 21 | namespace Shader::Backend::GLASM { | ||
| 22 | |||
| 23 | class EmitContext; | ||
| 24 | |||
| 25 | enum class Type : u32 { | ||
| 26 | Void, | ||
| 27 | Register, | ||
| 28 | U32, | ||
| 29 | U64, | ||
| 30 | }; | ||
| 31 | |||
| 32 | struct Id { | ||
| 33 | union { | ||
| 34 | u32 raw; | ||
| 35 | BitField<0, 1, u32> is_valid; | ||
| 36 | BitField<1, 1, u32> is_long; | ||
| 37 | BitField<2, 1, u32> is_spill; | ||
| 38 | BitField<3, 1, u32> is_condition_code; | ||
| 39 | BitField<4, 1, u32> is_null; | ||
| 40 | BitField<5, 27, u32> index; | ||
| 41 | }; | ||
| 42 | |||
| 43 | bool operator==(Id rhs) const noexcept { | ||
| 44 | return raw == rhs.raw; | ||
| 45 | } | ||
| 46 | bool operator!=(Id rhs) const noexcept { | ||
| 47 | return !operator==(rhs); | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | static_assert(sizeof(Id) == sizeof(u32)); | ||
| 51 | |||
| 52 | struct Value { | ||
| 53 | Type type; | ||
| 54 | union { | ||
| 55 | Id id; | ||
| 56 | u32 imm_u32; | ||
| 57 | u64 imm_u64; | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool operator==(const Value& rhs) const noexcept { | ||
| 61 | if (type != rhs.type) { | ||
| 62 | return false; | ||
| 63 | } | ||
| 64 | switch (type) { | ||
| 65 | case Type::Void: | ||
| 66 | return true; | ||
| 67 | case Type::Register: | ||
| 68 | return id == rhs.id; | ||
| 69 | case Type::U32: | ||
| 70 | return imm_u32 == rhs.imm_u32; | ||
| 71 | case Type::U64: | ||
| 72 | return imm_u64 == rhs.imm_u64; | ||
| 73 | } | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | bool operator!=(const Value& rhs) const noexcept { | ||
| 77 | return !operator==(rhs); | ||
| 78 | } | ||
| 79 | }; | ||
| 80 | struct Register : Value {}; | ||
| 81 | struct ScalarRegister : Value {}; | ||
| 82 | struct ScalarU32 : Value {}; | ||
| 83 | struct ScalarS32 : Value {}; | ||
| 84 | struct ScalarF32 : Value {}; | ||
| 85 | struct ScalarF64 : Value {}; | ||
| 86 | |||
| 87 | class RegAlloc { | ||
| 88 | public: | ||
| 89 | RegAlloc() = default; | ||
| 90 | |||
| 91 | Register Define(IR::Inst& inst); | ||
| 92 | |||
| 93 | Register LongDefine(IR::Inst& inst); | ||
| 94 | |||
| 95 | [[nodiscard]] Value Peek(const IR::Value& value); | ||
| 96 | |||
| 97 | Value Consume(const IR::Value& value); | ||
| 98 | |||
| 99 | void Unref(IR::Inst& inst); | ||
| 100 | |||
| 101 | [[nodiscard]] Register AllocReg(); | ||
| 102 | |||
| 103 | [[nodiscard]] Register AllocLongReg(); | ||
| 104 | |||
| 105 | void FreeReg(Register reg); | ||
| 106 | |||
| 107 | void InvalidateConditionCodes() { | ||
| 108 | // This does nothing for now | ||
| 109 | } | ||
| 110 | |||
| 111 | [[nodiscard]] size_t NumUsedRegisters() const noexcept { | ||
| 112 | return num_used_registers; | ||
| 113 | } | ||
| 114 | |||
| 115 | [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { | ||
| 116 | return num_used_long_registers; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] bool IsEmpty() const noexcept { | ||
| 120 | return register_use.none() && long_register_use.none(); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns true if the instruction is expected to be aliased to another | ||
| 124 | static bool IsAliased(const IR::Inst& inst); | ||
| 125 | |||
| 126 | /// Returns the underlying value out of an alias sequence | ||
| 127 | static IR::Inst& AliasInst(IR::Inst& inst); | ||
| 128 | |||
| 129 | private: | ||
| 130 | static constexpr size_t NUM_REGS = 4096; | ||
| 131 | static constexpr size_t NUM_ELEMENTS = 4; | ||
| 132 | |||
| 133 | Value MakeImm(const IR::Value& value); | ||
| 134 | |||
| 135 | Register Define(IR::Inst& inst, bool is_long); | ||
| 136 | |||
| 137 | Value PeekInst(IR::Inst& inst); | ||
| 138 | |||
| 139 | Value ConsumeInst(IR::Inst& inst); | ||
| 140 | |||
| 141 | Id Alloc(bool is_long); | ||
| 142 | |||
| 143 | void Free(Id id); | ||
| 144 | |||
| 145 | size_t num_used_registers{}; | ||
| 146 | size_t num_used_long_registers{}; | ||
| 147 | std::bitset<NUM_REGS> register_use{}; | ||
| 148 | std::bitset<NUM_REGS> long_register_use{}; | ||
| 149 | }; | ||
| 150 | |||
| 151 | template <bool scalar, typename FormatContext> | ||
| 152 | auto FormatTo(FormatContext& ctx, Id id) { | ||
| 153 | if (id.is_condition_code != 0) { | ||
| 154 | throw NotImplementedException("Condition code emission"); | ||
| 155 | } | ||
| 156 | if (id.is_spill != 0) { | ||
| 157 | throw NotImplementedException("Spill emission"); | ||
| 158 | } | ||
| 159 | if constexpr (scalar) { | ||
| 160 | if (id.is_null != 0) { | ||
| 161 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); | ||
| 162 | } | ||
| 163 | if (id.is_long != 0) { | ||
| 164 | return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); | ||
| 165 | } else { | ||
| 166 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); | ||
| 167 | } | ||
| 168 | } else { | ||
| 169 | if (id.is_null != 0) { | ||
| 170 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); | ||
| 171 | } | ||
| 172 | if (id.is_long != 0) { | ||
| 173 | return fmt::format_to(ctx.out(), "D{}", id.index.Value()); | ||
| 174 | } else { | ||
| 175 | return fmt::format_to(ctx.out(), "R{}", id.index.Value()); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | } // namespace Shader::Backend::GLASM | ||
| 181 | |||
| 182 | template <> | ||
| 183 | struct fmt::formatter<Shader::Backend::GLASM::Id> { | ||
| 184 | constexpr auto parse(format_parse_context& ctx) { | ||
| 185 | return ctx.begin(); | ||
| 186 | } | ||
| 187 | template <typename FormatContext> | ||
| 188 | auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { | ||
| 189 | return Shader::Backend::GLASM::FormatTo<true>(ctx, id); | ||
| 190 | } | ||
| 191 | }; | ||
| 192 | |||
| 193 | template <> | ||
| 194 | struct fmt::formatter<Shader::Backend::GLASM::Register> { | ||
| 195 | constexpr auto parse(format_parse_context& ctx) { | ||
| 196 | return ctx.begin(); | ||
| 197 | } | ||
| 198 | template <typename FormatContext> | ||
| 199 | auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) { | ||
| 200 | if (value.type != Shader::Backend::GLASM::Type::Register) { | ||
| 201 | throw Shader::InvalidArgument("Register value type is not register"); | ||
| 202 | } | ||
| 203 | return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id); | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | template <> | ||
| 208 | struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> { | ||
| 209 | constexpr auto parse(format_parse_context& ctx) { | ||
| 210 | return ctx.begin(); | ||
| 211 | } | ||
| 212 | template <typename FormatContext> | ||
| 213 | auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) { | ||
| 214 | if (value.type != Shader::Backend::GLASM::Type::Register) { | ||
| 215 | throw Shader::InvalidArgument("Register value type is not register"); | ||
| 216 | } | ||
| 217 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 218 | } | ||
| 219 | }; | ||
| 220 | |||
| 221 | template <> | ||
| 222 | struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> { | ||
| 223 | constexpr auto parse(format_parse_context& ctx) { | ||
| 224 | return ctx.begin(); | ||
| 225 | } | ||
| 226 | template <typename FormatContext> | ||
| 227 | auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { | ||
| 228 | switch (value.type) { | ||
| 229 | case Shader::Backend::GLASM::Type::Void: | ||
| 230 | break; | ||
| 231 | case Shader::Backend::GLASM::Type::Register: | ||
| 232 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 233 | case Shader::Backend::GLASM::Type::U32: | ||
| 234 | return fmt::format_to(ctx.out(), "{}", value.imm_u32); | ||
| 235 | case Shader::Backend::GLASM::Type::U64: | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 239 | } | ||
| 240 | }; | ||
| 241 | |||
| 242 | template <> | ||
| 243 | struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> { | ||
| 244 | constexpr auto parse(format_parse_context& ctx) { | ||
| 245 | return ctx.begin(); | ||
| 246 | } | ||
| 247 | template <typename FormatContext> | ||
| 248 | auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { | ||
| 249 | switch (value.type) { | ||
| 250 | case Shader::Backend::GLASM::Type::Void: | ||
| 251 | break; | ||
| 252 | case Shader::Backend::GLASM::Type::Register: | ||
| 253 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 254 | case Shader::Backend::GLASM::Type::U32: | ||
| 255 | return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32)); | ||
| 256 | case Shader::Backend::GLASM::Type::U64: | ||
| 257 | break; | ||
| 258 | } | ||
| 259 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 260 | } | ||
| 261 | }; | ||
| 262 | |||
| 263 | template <> | ||
| 264 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> { | ||
| 265 | constexpr auto parse(format_parse_context& ctx) { | ||
| 266 | return ctx.begin(); | ||
| 267 | } | ||
| 268 | template <typename FormatContext> | ||
| 269 | auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { | ||
| 270 | switch (value.type) { | ||
| 271 | case Shader::Backend::GLASM::Type::Void: | ||
| 272 | break; | ||
| 273 | case Shader::Backend::GLASM::Type::Register: | ||
| 274 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 275 | case Shader::Backend::GLASM::Type::U32: | ||
| 276 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32)); | ||
| 277 | case Shader::Backend::GLASM::Type::U64: | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 281 | } | ||
| 282 | }; | ||
| 283 | |||
| 284 | template <> | ||
| 285 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> { | ||
| 286 | constexpr auto parse(format_parse_context& ctx) { | ||
| 287 | return ctx.begin(); | ||
| 288 | } | ||
| 289 | template <typename FormatContext> | ||
| 290 | auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { | ||
| 291 | switch (value.type) { | ||
| 292 | case Shader::Backend::GLASM::Type::Void: | ||
| 293 | break; | ||
| 294 | case Shader::Backend::GLASM::Type::Register: | ||
| 295 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 296 | case Shader::Backend::GLASM::Type::U32: | ||
| 297 | break; | ||
| 298 | case Shader::Backend::GLASM::Type::U64: | ||
| 299 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64)); | ||
| 300 | } | ||
| 301 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 302 | } | ||
| 303 | }; | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp new file mode 100644 index 000000000..4e6f2c0fe --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp | |||
| @@ -0,0 +1,715 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/bindings.h" | ||
| 6 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 8 | #include "shader_recompiler/profile.h" | ||
| 9 | #include "shader_recompiler/runtime_info.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | u32 CbufIndex(size_t offset) { | ||
| 14 | return (offset / 4) % 4; | ||
| 15 | } | ||
| 16 | |||
| 17 | char Swizzle(size_t offset) { | ||
| 18 | return "xyzw"[CbufIndex(offset)]; | ||
| 19 | } | ||
| 20 | |||
| 21 | std::string_view InterpDecorator(Interpolation interp) { | ||
| 22 | switch (interp) { | ||
| 23 | case Interpolation::Smooth: | ||
| 24 | return ""; | ||
| 25 | case Interpolation::Flat: | ||
| 26 | return "flat "; | ||
| 27 | case Interpolation::NoPerspective: | ||
| 28 | return "noperspective "; | ||
| 29 | } | ||
| 30 | throw InvalidArgument("Invalid interpolation {}", interp); | ||
| 31 | } | ||
| 32 | |||
| 33 | std::string_view InputArrayDecorator(Stage stage) { | ||
| 34 | switch (stage) { | ||
| 35 | case Stage::Geometry: | ||
| 36 | case Stage::TessellationControl: | ||
| 37 | case Stage::TessellationEval: | ||
| 38 | return "[]"; | ||
| 39 | default: | ||
| 40 | return ""; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | bool StoresPerVertexAttributes(Stage stage) { | ||
| 45 | switch (stage) { | ||
| 46 | case Stage::VertexA: | ||
| 47 | case Stage::VertexB: | ||
| 48 | case Stage::Geometry: | ||
| 49 | case Stage::TessellationEval: | ||
| 50 | return true; | ||
| 51 | default: | ||
| 52 | return false; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | std::string OutputDecorator(Stage stage, u32 size) { | ||
| 57 | switch (stage) { | ||
| 58 | case Stage::TessellationControl: | ||
| 59 | return fmt::format("[{}]", size); | ||
| 60 | default: | ||
| 61 | return ""; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | std::string_view SamplerType(TextureType type, bool is_depth) { | ||
| 66 | if (is_depth) { | ||
| 67 | switch (type) { | ||
| 68 | case TextureType::Color1D: | ||
| 69 | return "sampler1DShadow"; | ||
| 70 | case TextureType::ColorArray1D: | ||
| 71 | return "sampler1DArrayShadow"; | ||
| 72 | case TextureType::Color2D: | ||
| 73 | return "sampler2DShadow"; | ||
| 74 | case TextureType::ColorArray2D: | ||
| 75 | return "sampler2DArrayShadow"; | ||
| 76 | case TextureType::ColorCube: | ||
| 77 | return "samplerCubeShadow"; | ||
| 78 | case TextureType::ColorArrayCube: | ||
| 79 | return "samplerCubeArrayShadow"; | ||
| 80 | default: | ||
| 81 | throw NotImplementedException("Texture type: {}", type); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | switch (type) { | ||
| 85 | case TextureType::Color1D: | ||
| 86 | return "sampler1D"; | ||
| 87 | case TextureType::ColorArray1D: | ||
| 88 | return "sampler1DArray"; | ||
| 89 | case TextureType::Color2D: | ||
| 90 | return "sampler2D"; | ||
| 91 | case TextureType::ColorArray2D: | ||
| 92 | return "sampler2DArray"; | ||
| 93 | case TextureType::Color3D: | ||
| 94 | return "sampler3D"; | ||
| 95 | case TextureType::ColorCube: | ||
| 96 | return "samplerCube"; | ||
| 97 | case TextureType::ColorArrayCube: | ||
| 98 | return "samplerCubeArray"; | ||
| 99 | case TextureType::Buffer: | ||
| 100 | return "samplerBuffer"; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Texture type: {}", type); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string_view ImageType(TextureType type) { | ||
| 107 | switch (type) { | ||
| 108 | case TextureType::Color1D: | ||
| 109 | return "uimage1D"; | ||
| 110 | case TextureType::ColorArray1D: | ||
| 111 | return "uimage1DArray"; | ||
| 112 | case TextureType::Color2D: | ||
| 113 | return "uimage2D"; | ||
| 114 | case TextureType::ColorArray2D: | ||
| 115 | return "uimage2DArray"; | ||
| 116 | case TextureType::Color3D: | ||
| 117 | return "uimage3D"; | ||
| 118 | case TextureType::ColorCube: | ||
| 119 | return "uimageCube"; | ||
| 120 | case TextureType::ColorArrayCube: | ||
| 121 | return "uimageCubeArray"; | ||
| 122 | case TextureType::Buffer: | ||
| 123 | return "uimageBuffer"; | ||
| 124 | default: | ||
| 125 | throw NotImplementedException("Image type: {}", type); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | std::string_view ImageFormatString(ImageFormat format) { | ||
| 130 | switch (format) { | ||
| 131 | case ImageFormat::Typeless: | ||
| 132 | return ""; | ||
| 133 | case ImageFormat::R8_UINT: | ||
| 134 | return ",r8ui"; | ||
| 135 | case ImageFormat::R8_SINT: | ||
| 136 | return ",r8i"; | ||
| 137 | case ImageFormat::R16_UINT: | ||
| 138 | return ",r16ui"; | ||
| 139 | case ImageFormat::R16_SINT: | ||
| 140 | return ",r16i"; | ||
| 141 | case ImageFormat::R32_UINT: | ||
| 142 | return ",r32ui"; | ||
| 143 | case ImageFormat::R32G32_UINT: | ||
| 144 | return ",rg32ui"; | ||
| 145 | case ImageFormat::R32G32B32A32_UINT: | ||
| 146 | return ",rgba32ui"; | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Image format: {}", format); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | std::string_view ImageAccessQualifier(bool is_written, bool is_read) { | ||
| 153 | if (is_written && !is_read) { | ||
| 154 | return "writeonly "; | ||
| 155 | } | ||
| 156 | if (is_read && !is_written) { | ||
| 157 | return "readonly "; | ||
| 158 | } | ||
| 159 | return ""; | ||
| 160 | } | ||
| 161 | |||
| 162 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 163 | switch (primitive) { | ||
| 164 | case TessPrimitive::Triangles: | ||
| 165 | return "triangles"; | ||
| 166 | case TessPrimitive::Quads: | ||
| 167 | return "quads"; | ||
| 168 | case TessPrimitive::Isolines: | ||
| 169 | return "isolines"; | ||
| 170 | } | ||
| 171 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 172 | } | ||
| 173 | |||
| 174 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 175 | switch (spacing) { | ||
| 176 | case TessSpacing::Equal: | ||
| 177 | return "equal_spacing"; | ||
| 178 | case TessSpacing::FractionalOdd: | ||
| 179 | return "fractional_odd_spacing"; | ||
| 180 | case TessSpacing::FractionalEven: | ||
| 181 | return "fractional_even_spacing"; | ||
| 182 | } | ||
| 183 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 184 | } | ||
| 185 | |||
| 186 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 187 | switch (topology) { | ||
| 188 | case InputTopology::Points: | ||
| 189 | return "points"; | ||
| 190 | case InputTopology::Lines: | ||
| 191 | return "lines"; | ||
| 192 | case InputTopology::LinesAdjacency: | ||
| 193 | return "lines_adjacency"; | ||
| 194 | case InputTopology::Triangles: | ||
| 195 | return "triangles"; | ||
| 196 | case InputTopology::TrianglesAdjacency: | ||
| 197 | return "triangles_adjacency"; | ||
| 198 | } | ||
| 199 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 200 | } | ||
| 201 | |||
| 202 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 203 | switch (topology) { | ||
| 204 | case OutputTopology::PointList: | ||
| 205 | return "points"; | ||
| 206 | case OutputTopology::LineStrip: | ||
| 207 | return "line_strip"; | ||
| 208 | case OutputTopology::TriangleStrip: | ||
| 209 | return "triangle_strip"; | ||
| 210 | } | ||
| 211 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 212 | } | ||
| 213 | |||
| 214 | void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { | ||
| 215 | if (!ctx.info.stores.Legacy()) { | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | if (ctx.info.stores.FixedFunctionTexture()) { | ||
| 219 | header += "vec4 gl_TexCoord[8];"; | ||
| 220 | } | ||
| 221 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 222 | header += "vec4 gl_FrontColor;"; | ||
| 223 | } | ||
| 224 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { | ||
| 225 | header += "vec4 gl_FrontSecondaryColor;"; | ||
| 226 | } | ||
| 227 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { | ||
| 228 | header += "vec4 gl_BackColor;"; | ||
| 229 | } | ||
| 230 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { | ||
| 231 | header += "vec4 gl_BackSecondaryColor;"; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | void SetupOutPerVertex(EmitContext& ctx, std::string& header) { | ||
| 236 | if (!StoresPerVertexAttributes(ctx.stage)) { | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | if (ctx.uses_geometry_passthrough) { | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | header += "out gl_PerVertex{vec4 gl_Position;"; | ||
| 243 | if (ctx.info.stores[IR::Attribute::PointSize]) { | ||
| 244 | header += "float gl_PointSize;"; | ||
| 245 | } | ||
| 246 | if (ctx.info.stores.ClipDistances()) { | ||
| 247 | header += "float gl_ClipDistance[];"; | ||
| 248 | } | ||
| 249 | if (ctx.info.stores[IR::Attribute::ViewportIndex] && | ||
| 250 | ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { | ||
| 251 | header += "int gl_ViewportIndex;"; | ||
| 252 | } | ||
| 253 | SetupLegacyOutPerVertex(ctx, header); | ||
| 254 | header += "};"; | ||
| 255 | if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { | ||
| 256 | header += "out int gl_ViewportIndex;"; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | void SetupInPerVertex(EmitContext& ctx, std::string& header) { | ||
| 261 | // Currently only required for TessellationControl to adhere to | ||
| 262 | // ARB_separate_shader_objects requirements | ||
| 263 | if (ctx.stage != Stage::TessellationControl) { | ||
| 264 | return; | ||
| 265 | } | ||
| 266 | const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; | ||
| 267 | const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; | ||
| 268 | const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; | ||
| 269 | const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; | ||
| 270 | if (!loads_per_vertex) { | ||
| 271 | return; | ||
| 272 | } | ||
| 273 | header += "in gl_PerVertex{"; | ||
| 274 | if (loads_position) { | ||
| 275 | header += "vec4 gl_Position;"; | ||
| 276 | } | ||
| 277 | if (loads_point_size) { | ||
| 278 | header += "float gl_PointSize;"; | ||
| 279 | } | ||
| 280 | if (loads_clip_distance) { | ||
| 281 | header += "float gl_ClipDistance[];"; | ||
| 282 | } | ||
| 283 | header += "}gl_in[gl_MaxPatchVertices];"; | ||
| 284 | } | ||
| 285 | |||
| 286 | void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { | ||
| 287 | if (!ctx.info.loads.Legacy()) { | ||
| 288 | return; | ||
| 289 | } | ||
| 290 | header += "in gl_PerFragment{"; | ||
| 291 | if (ctx.info.loads.FixedFunctionTexture()) { | ||
| 292 | header += "vec4 gl_TexCoord[8];"; | ||
| 293 | } | ||
| 294 | if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 295 | header += "vec4 gl_Color;"; | ||
| 296 | } | ||
| 297 | header += "};"; | ||
| 298 | } | ||
| 299 | |||
| 300 | } // Anonymous namespace | ||
| 301 | |||
| 302 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 303 | const RuntimeInfo& runtime_info_) | ||
| 304 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, | ||
| 305 | uses_geometry_passthrough{program.is_geometry_passthrough && | ||
| 306 | profile.support_geometry_shader_passthrough} { | ||
| 307 | if (profile.need_fastmath_off) { | ||
| 308 | header += "#pragma optionNV(fastmath off)\n"; | ||
| 309 | } | ||
| 310 | SetupExtensions(); | ||
| 311 | switch (program.stage) { | ||
| 312 | case Stage::VertexA: | ||
| 313 | case Stage::VertexB: | ||
| 314 | stage_name = "vs"; | ||
| 315 | break; | ||
| 316 | case Stage::TessellationControl: | ||
| 317 | stage_name = "tcs"; | ||
| 318 | header += fmt::format("layout(vertices={})out;", program.invocations); | ||
| 319 | break; | ||
| 320 | case Stage::TessellationEval: | ||
| 321 | stage_name = "tes"; | ||
| 322 | header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), | ||
| 323 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 324 | runtime_info.tess_clockwise ? "cw" : "ccw"); | ||
| 325 | break; | ||
| 326 | case Stage::Geometry: | ||
| 327 | stage_name = "gs"; | ||
| 328 | header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); | ||
| 329 | if (uses_geometry_passthrough) { | ||
| 330 | header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; | ||
| 331 | break; | ||
| 332 | } else if (program.is_geometry_passthrough && | ||
| 333 | !profile.support_geometry_shader_passthrough) { | ||
| 334 | LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported"); | ||
| 335 | } | ||
| 336 | header += fmt::format( | ||
| 337 | "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", | ||
| 338 | OutputPrimitive(program.output_topology), program.output_vertices); | ||
| 339 | break; | ||
| 340 | case Stage::Fragment: | ||
| 341 | stage_name = "fs"; | ||
| 342 | position_name = "gl_FragCoord"; | ||
| 343 | if (runtime_info.force_early_z) { | ||
| 344 | header += "layout(early_fragment_tests)in;"; | ||
| 345 | } | ||
| 346 | if (info.uses_sample_id) { | ||
| 347 | header += "in int gl_SampleID;"; | ||
| 348 | } | ||
| 349 | if (info.stores_sample_mask) { | ||
| 350 | header += "out int gl_SampleMask[];"; | ||
| 351 | } | ||
| 352 | break; | ||
| 353 | case Stage::Compute: | ||
| 354 | stage_name = "cs"; | ||
| 355 | const u32 local_x{std::max(program.workgroup_size[0], 1u)}; | ||
| 356 | const u32 local_y{std::max(program.workgroup_size[1], 1u)}; | ||
| 357 | const u32 local_z{std::max(program.workgroup_size[2], 1u)}; | ||
| 358 | header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", | ||
| 359 | local_x, local_y, local_z); | ||
| 360 | break; | ||
| 361 | } | ||
| 362 | SetupOutPerVertex(*this, header); | ||
| 363 | SetupInPerVertex(*this, header); | ||
| 364 | SetupLegacyInPerFragment(*this, header); | ||
| 365 | |||
| 366 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 367 | if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { | ||
| 368 | continue; | ||
| 369 | } | ||
| 370 | const auto qualifier{uses_geometry_passthrough ? "passthrough" | ||
| 371 | : fmt::format("location={}", index)}; | ||
| 372 | header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, | ||
| 373 | InterpDecorator(info.interpolation[index]), index, | ||
| 374 | InputArrayDecorator(stage)); | ||
| 375 | } | ||
| 376 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 377 | if (!info.uses_patches[index]) { | ||
| 378 | continue; | ||
| 379 | } | ||
| 380 | const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; | ||
| 381 | header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); | ||
| 382 | } | ||
| 383 | if (stage == Stage::Fragment) { | ||
| 384 | for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { | ||
| 385 | if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { | ||
| 386 | continue; | ||
| 387 | } | ||
| 388 | header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); | ||
| 389 | } | ||
| 390 | } | ||
| 391 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 392 | if (info.stores.Generic(index)) { | ||
| 393 | DefineGenericOutput(index, program.invocations); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | DefineConstantBuffers(bindings); | ||
| 397 | DefineStorageBuffers(bindings); | ||
| 398 | SetupImages(bindings); | ||
| 399 | SetupTextures(bindings); | ||
| 400 | DefineHelperFunctions(); | ||
| 401 | DefineConstants(); | ||
| 402 | } | ||
| 403 | |||
| 404 | void EmitContext::SetupExtensions() { | ||
| 405 | header += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||
| 406 | if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { | ||
| 407 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | ||
| 408 | } | ||
| 409 | if (info.uses_int64 && profile.support_int64) { | ||
| 410 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 411 | } | ||
| 412 | if (info.uses_int64_bit_atomics) { | ||
| 413 | header += "#extension GL_NV_shader_atomic_int64 : enable\n"; | ||
| 414 | } | ||
| 415 | if (info.uses_atomic_f32_add) { | ||
| 416 | header += "#extension GL_NV_shader_atomic_float : enable\n"; | ||
| 417 | } | ||
| 418 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 419 | header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; | ||
| 420 | } | ||
| 421 | if (info.uses_fp16) { | ||
| 422 | if (profile.support_gl_nv_gpu_shader_5) { | ||
| 423 | header += "#extension GL_NV_gpu_shader5 : enable\n"; | ||
| 424 | } | ||
| 425 | if (profile.support_gl_amd_gpu_shader_half_float) { | ||
| 426 | header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; | ||
| 427 | } | ||
| 428 | } | ||
| 429 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 430 | info.uses_subgroup_shuffles || info.uses_fswzadd) { | ||
| 431 | header += "#extension GL_ARB_shader_ballot : enable\n" | ||
| 432 | "#extension GL_ARB_shader_group_vote : enable\n"; | ||
| 433 | if (!info.uses_int64 && profile.support_int64) { | ||
| 434 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 435 | } | ||
| 436 | if (profile.support_gl_warp_intrinsics) { | ||
| 437 | header += "#extension GL_NV_shader_thread_shuffle : enable\n"; | ||
| 438 | } | ||
| 439 | } | ||
| 440 | if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && | ||
| 441 | profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { | ||
| 442 | header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 443 | } | ||
| 444 | if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { | ||
| 445 | header += "#extension GL_ARB_sparse_texture2 : enable\n"; | ||
| 446 | } | ||
| 447 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 448 | header += "#extension GL_NV_viewport_array2 : enable\n"; | ||
| 449 | } | ||
| 450 | if (info.uses_typeless_image_reads) { | ||
| 451 | header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; | ||
| 452 | } | ||
| 453 | if (info.uses_derivatives && profile.support_gl_derivative_control) { | ||
| 454 | header += "#extension GL_ARB_derivative_control : enable\n"; | ||
| 455 | } | ||
| 456 | if (uses_geometry_passthrough) { | ||
| 457 | header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | void EmitContext::DefineConstantBuffers(Bindings& bindings) { | ||
| 462 | if (info.constant_buffer_descriptors.empty()) { | ||
| 463 | return; | ||
| 464 | } | ||
| 465 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 466 | header += fmt::format( | ||
| 467 | "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", | ||
| 468 | bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); | ||
| 469 | bindings.uniform_buffer += desc.count; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | |||
| 473 | void EmitContext::DefineStorageBuffers(Bindings& bindings) { | ||
| 474 | if (info.storage_buffers_descriptors.empty()) { | ||
| 475 | return; | ||
| 476 | } | ||
| 477 | u32 index{}; | ||
| 478 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 479 | header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", | ||
| 480 | bindings.storage_buffer, stage_name, bindings.storage_buffer, | ||
| 481 | stage_name, index); | ||
| 482 | bindings.storage_buffer += desc.count; | ||
| 483 | index += desc.count; | ||
| 484 | } | ||
| 485 | } | ||
| 486 | |||
| 487 | void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { | ||
| 488 | static constexpr std::string_view swizzle{"xyzw"}; | ||
| 489 | const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 490 | u32 element{0}; | ||
| 491 | while (element < 4) { | ||
| 492 | std::string definition{fmt::format("layout(location={}", index)}; | ||
| 493 | const u32 remainder{4 - element}; | ||
| 494 | const TransformFeedbackVarying* xfb_varying{}; | ||
| 495 | if (!runtime_info.xfb_varyings.empty()) { | ||
| 496 | xfb_varying = &runtime_info.xfb_varyings[base_index + element]; | ||
| 497 | xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; | ||
| 498 | } | ||
| 499 | const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; | ||
| 500 | if (element > 0) { | ||
| 501 | definition += fmt::format(",component={}", element); | ||
| 502 | } | ||
| 503 | if (xfb_varying) { | ||
| 504 | definition += | ||
| 505 | fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, | ||
| 506 | xfb_varying->stride, xfb_varying->offset); | ||
| 507 | } | ||
| 508 | std::string name{fmt::format("out_attr{}", index)}; | ||
| 509 | if (num_components < 4 || element > 0) { | ||
| 510 | name += fmt::format("_{}", swizzle.substr(element, num_components)); | ||
| 511 | } | ||
| 512 | const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; | ||
| 513 | definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); | ||
| 514 | header += definition; | ||
| 515 | |||
| 516 | const GenericElementInfo element_info{ | ||
| 517 | .name = name, | ||
| 518 | .first_element = element, | ||
| 519 | .num_components = num_components, | ||
| 520 | }; | ||
| 521 | std::fill_n(output_generics[index].begin() + element, num_components, element_info); | ||
| 522 | element += num_components; | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 526 | void EmitContext::DefineHelperFunctions() { | ||
| 527 | header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" | ||
| 528 | "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; | ||
| 529 | if (info.uses_global_increment || info.uses_shared_increment) { | ||
| 530 | header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; | ||
| 531 | } | ||
| 532 | if (info.uses_global_decrement || info.uses_shared_decrement) { | ||
| 533 | header += "uint CasDecrement(uint op_a,uint op_b){" | ||
| 534 | "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; | ||
| 535 | } | ||
| 536 | if (info.uses_atomic_f32_add) { | ||
| 537 | header += "uint CasFloatAdd(uint op_a,float op_b){" | ||
| 538 | "return ftou(utof(op_a)+op_b);}"; | ||
| 539 | } | ||
| 540 | if (info.uses_atomic_f32x2_add) { | ||
| 541 | header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" | ||
| 542 | "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; | ||
| 543 | } | ||
| 544 | if (info.uses_atomic_f32x2_min) { | ||
| 545 | header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " | ||
| 546 | "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; | ||
| 547 | } | ||
| 548 | if (info.uses_atomic_f32x2_max) { | ||
| 549 | header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " | ||
| 550 | "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; | ||
| 551 | } | ||
| 552 | if (info.uses_atomic_f16x2_add) { | ||
| 553 | header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " | ||
| 554 | "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; | ||
| 555 | } | ||
| 556 | if (info.uses_atomic_f16x2_min) { | ||
| 557 | header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " | ||
| 558 | "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; | ||
| 559 | } | ||
| 560 | if (info.uses_atomic_f16x2_max) { | ||
| 561 | header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " | ||
| 562 | "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; | ||
| 563 | } | ||
| 564 | if (info.uses_atomic_s32_min) { | ||
| 565 | header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; | ||
| 566 | } | ||
| 567 | if (info.uses_atomic_s32_max) { | ||
| 568 | header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; | ||
| 569 | } | ||
| 570 | if (info.uses_global_memory && profile.support_int64) { | ||
| 571 | header += DefineGlobalMemoryFunctions(); | ||
| 572 | } | ||
| 573 | if (info.loads_indexed_attributes) { | ||
| 574 | const bool is_array{stage == Stage::Geometry}; | ||
| 575 | const auto vertex_arg{is_array ? ",uint vertex" : ""}; | ||
| 576 | std::string func{ | ||
| 577 | fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " | ||
| 578 | "masked_index=uint(base_index)&3u;switch(base_index>>2){{", | ||
| 579 | vertex_arg)}; | ||
| 580 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 581 | const auto position_idx{is_array ? "gl_in[vertex]." : ""}; | ||
| 582 | func += fmt::format("case {}:return {}{}[masked_index];", | ||
| 583 | static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx, | ||
| 584 | position_name); | ||
| 585 | } | ||
| 586 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 587 | for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 588 | if (!info.loads.Generic(index)) { | ||
| 589 | continue; | ||
| 590 | } | ||
| 591 | const auto vertex_idx{is_array ? "[vertex]" : ""}; | ||
| 592 | func += fmt::format("case {}:return in_attr{}{}[masked_index];", | ||
| 593 | base_attribute_value + index, index, vertex_idx); | ||
| 594 | } | ||
| 595 | func += "default: return 0.0;}}"; | ||
| 596 | header += func; | ||
| 597 | } | ||
| 598 | if (info.stores_indexed_attributes) { | ||
| 599 | // TODO | ||
| 600 | } | ||
| 601 | } | ||
| 602 | |||
| 603 | std::string EmitContext::DefineGlobalMemoryFunctions() { | ||
| 604 | const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { | ||
| 605 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||
| 606 | const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; | ||
| 607 | const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; | ||
| 608 | const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; | ||
| 609 | std::array<std::string, 2> addr_xy; | ||
| 610 | std::array<std::string, 2> size_xy; | ||
| 611 | for (size_t i = 0; i < addr_xy.size(); ++i) { | ||
| 612 | const auto addr_loc{ssbo.cbuf_offset + 4 * i}; | ||
| 613 | const auto size_loc{size_cbuf_offset + 4 * i}; | ||
| 614 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | ||
| 615 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | ||
| 616 | } | ||
| 617 | const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; | ||
| 618 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | ||
| 619 | func += addr_statment; | ||
| 620 | |||
| 621 | const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; | ||
| 622 | const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; | ||
| 623 | const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; | ||
| 624 | const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; | ||
| 625 | func += comparison; | ||
| 626 | |||
| 627 | const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; | ||
| 628 | func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); | ||
| 629 | }}; | ||
| 630 | std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; | ||
| 631 | std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; | ||
| 632 | std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; | ||
| 633 | std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; | ||
| 634 | std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; | ||
| 635 | std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; | ||
| 636 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||
| 637 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 638 | if (!info.nvn_buffer_used[index]) { | ||
| 639 | continue; | ||
| 640 | } | ||
| 641 | define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); | ||
| 642 | define_body(write_func_64, index, | ||
| 643 | "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); | ||
| 644 | define_body(write_func_128, index, | ||
| 645 | "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" | ||
| 646 | "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); | ||
| 647 | define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); | ||
| 648 | define_body(load_func_64, index, | ||
| 649 | "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); | ||
| 650 | define_body(load_func_128, index, | ||
| 651 | "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" | ||
| 652 | "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); | ||
| 653 | } | ||
| 654 | write_func += '}'; | ||
| 655 | write_func_64 += '}'; | ||
| 656 | write_func_128 += '}'; | ||
| 657 | load_func += "return 0u;}"; | ||
| 658 | load_func_64 += "return uvec2(0);}"; | ||
| 659 | load_func_128 += "return uvec4(0);}"; | ||
| 660 | return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; | ||
| 661 | } | ||
| 662 | |||
| 663 | void EmitContext::SetupImages(Bindings& bindings) { | ||
| 664 | image_buffers.reserve(info.image_buffer_descriptors.size()); | ||
| 665 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 666 | image_buffers.push_back({bindings.image, desc.count}); | ||
| 667 | const auto format{ImageFormatString(desc.format)}; | ||
| 668 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 669 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 670 | header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", | ||
| 671 | bindings.image, format, qualifier, bindings.image, array_decorator); | ||
| 672 | bindings.image += desc.count; | ||
| 673 | } | ||
| 674 | images.reserve(info.image_descriptors.size()); | ||
| 675 | for (const auto& desc : info.image_descriptors) { | ||
| 676 | images.push_back({bindings.image, desc.count}); | ||
| 677 | const auto format{ImageFormatString(desc.format)}; | ||
| 678 | const auto image_type{ImageType(desc.type)}; | ||
| 679 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 680 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 681 | header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, | ||
| 682 | qualifier, image_type, bindings.image, array_decorator); | ||
| 683 | bindings.image += desc.count; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | |||
| 687 | void EmitContext::SetupTextures(Bindings& bindings) { | ||
| 688 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); | ||
| 689 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 690 | texture_buffers.push_back({bindings.texture, desc.count}); | ||
| 691 | const auto sampler_type{SamplerType(TextureType::Buffer, false)}; | ||
| 692 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 693 | header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, | ||
| 694 | sampler_type, bindings.texture, array_decorator); | ||
| 695 | bindings.texture += desc.count; | ||
| 696 | } | ||
| 697 | textures.reserve(info.texture_descriptors.size()); | ||
| 698 | for (const auto& desc : info.texture_descriptors) { | ||
| 699 | textures.push_back({bindings.texture, desc.count}); | ||
| 700 | const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; | ||
| 701 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 702 | header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, | ||
| 703 | sampler_type, bindings.texture, array_decorator); | ||
| 704 | bindings.texture += desc.count; | ||
| 705 | } | ||
| 706 | } | ||
| 707 | |||
| 708 | void EmitContext::DefineConstants() { | ||
| 709 | if (info.uses_fswzadd) { | ||
| 710 | header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" | ||
| 711 | "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; | ||
| 712 | } | ||
| 713 | } | ||
| 714 | |||
| 715 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h new file mode 100644 index 000000000..d9b639d29 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_context.h | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/backend/glsl/var_alloc.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader { | ||
| 17 | struct Info; | ||
| 18 | struct Profile; | ||
| 19 | struct RuntimeInfo; | ||
| 20 | } // namespace Shader | ||
| 21 | |||
| 22 | namespace Shader::Backend { | ||
| 23 | struct Bindings; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace Shader::IR { | ||
| 27 | class Inst; | ||
| 28 | struct Program; | ||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | namespace Shader::Backend::GLSL { | ||
| 32 | |||
| 33 | struct GenericElementInfo { | ||
| 34 | std::string name; | ||
| 35 | u32 first_element{}; | ||
| 36 | u32 num_components{}; | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct TextureImageDefinition { | ||
| 40 | u32 binding; | ||
| 41 | u32 count; | ||
| 42 | }; | ||
| 43 | |||
| 44 | class EmitContext { | ||
| 45 | public: | ||
| 46 | explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 47 | const RuntimeInfo& runtime_info_); | ||
| 48 | |||
| 49 | template <GlslVarType type, typename... Args> | ||
| 50 | void Add(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 51 | const auto var_def{var_alloc.AddDefine(inst, type)}; | ||
| 52 | if (var_def.empty()) { | ||
| 53 | // skip assigment. | ||
| 54 | code += fmt::format(fmt::runtime(format_str + 3), std::forward<Args>(args)...); | ||
| 55 | } else { | ||
| 56 | code += fmt::format(fmt::runtime(format_str), var_def, std::forward<Args>(args)...); | ||
| 57 | } | ||
| 58 | // TODO: Remove this | ||
| 59 | code += '\n'; | ||
| 60 | } | ||
| 61 | |||
| 62 | template <typename... Args> | ||
| 63 | void AddU1(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 64 | Add<GlslVarType::U1>(format_str, inst, args...); | ||
| 65 | } | ||
| 66 | |||
| 67 | template <typename... Args> | ||
| 68 | void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 69 | Add<GlslVarType::F16x2>(format_str, inst, args...); | ||
| 70 | } | ||
| 71 | |||
| 72 | template <typename... Args> | ||
| 73 | void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 74 | Add<GlslVarType::U32>(format_str, inst, args...); | ||
| 75 | } | ||
| 76 | |||
| 77 | template <typename... Args> | ||
| 78 | void AddF32(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 79 | Add<GlslVarType::F32>(format_str, inst, args...); | ||
| 80 | } | ||
| 81 | |||
| 82 | template <typename... Args> | ||
| 83 | void AddU64(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 84 | Add<GlslVarType::U64>(format_str, inst, args...); | ||
| 85 | } | ||
| 86 | |||
| 87 | template <typename... Args> | ||
| 88 | void AddF64(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 89 | Add<GlslVarType::F64>(format_str, inst, args...); | ||
| 90 | } | ||
| 91 | |||
| 92 | template <typename... Args> | ||
| 93 | void AddU32x2(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 94 | Add<GlslVarType::U32x2>(format_str, inst, args...); | ||
| 95 | } | ||
| 96 | |||
| 97 | template <typename... Args> | ||
| 98 | void AddF32x2(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 99 | Add<GlslVarType::F32x2>(format_str, inst, args...); | ||
| 100 | } | ||
| 101 | |||
| 102 | template <typename... Args> | ||
| 103 | void AddU32x3(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 104 | Add<GlslVarType::U32x3>(format_str, inst, args...); | ||
| 105 | } | ||
| 106 | |||
| 107 | template <typename... Args> | ||
| 108 | void AddF32x3(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 109 | Add<GlslVarType::F32x3>(format_str, inst, args...); | ||
| 110 | } | ||
| 111 | |||
| 112 | template <typename... Args> | ||
| 113 | void AddU32x4(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 114 | Add<GlslVarType::U32x4>(format_str, inst, args...); | ||
| 115 | } | ||
| 116 | |||
| 117 | template <typename... Args> | ||
| 118 | void AddF32x4(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 119 | Add<GlslVarType::F32x4>(format_str, inst, args...); | ||
| 120 | } | ||
| 121 | |||
| 122 | template <typename... Args> | ||
| 123 | void AddPrecF32(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 124 | Add<GlslVarType::PrecF32>(format_str, inst, args...); | ||
| 125 | } | ||
| 126 | |||
| 127 | template <typename... Args> | ||
| 128 | void AddPrecF64(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 129 | Add<GlslVarType::PrecF64>(format_str, inst, args...); | ||
| 130 | } | ||
| 131 | |||
| 132 | template <typename... Args> | ||
| 133 | void Add(const char* format_str, Args&&... args) { | ||
| 134 | code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); | ||
| 135 | // TODO: Remove this | ||
| 136 | code += '\n'; | ||
| 137 | } | ||
| 138 | |||
| 139 | std::string header; | ||
| 140 | std::string code; | ||
| 141 | VarAlloc var_alloc; | ||
| 142 | const Info& info; | ||
| 143 | const Profile& profile; | ||
| 144 | const RuntimeInfo& runtime_info; | ||
| 145 | |||
| 146 | Stage stage{}; | ||
| 147 | std::string_view stage_name = "invalid"; | ||
| 148 | std::string_view position_name = "gl_Position"; | ||
| 149 | |||
| 150 | std::vector<TextureImageDefinition> texture_buffers; | ||
| 151 | std::vector<TextureImageDefinition> image_buffers; | ||
| 152 | std::vector<TextureImageDefinition> textures; | ||
| 153 | std::vector<TextureImageDefinition> images; | ||
| 154 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; | ||
| 155 | |||
| 156 | u32 num_safety_loop_vars{}; | ||
| 157 | |||
| 158 | bool uses_y_direction{}; | ||
| 159 | bool uses_cc_carry{}; | ||
| 160 | bool uses_geometry_passthrough{}; | ||
| 161 | |||
| 162 | private: | ||
| 163 | void SetupExtensions(); | ||
| 164 | void DefineConstantBuffers(Bindings& bindings); | ||
| 165 | void DefineStorageBuffers(Bindings& bindings); | ||
| 166 | void DefineGenericOutput(size_t index, u32 invocations); | ||
| 167 | void DefineHelperFunctions(); | ||
| 168 | void DefineConstants(); | ||
| 169 | std::string DefineGlobalMemoryFunctions(); | ||
| 170 | void SetupImages(Bindings& bindings); | ||
| 171 | void SetupTextures(Bindings& bindings); | ||
| 172 | }; | ||
| 173 | |||
| 174 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp new file mode 100644 index 000000000..8a430d573 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp | |||
| @@ -0,0 +1,252 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "common/div_ceil.h" | ||
| 11 | #include "common/settings.h" | ||
| 12 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 13 | #include "shader_recompiler/backend/glsl/emit_glsl.h" | ||
| 14 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 16 | |||
| 17 | namespace Shader::Backend::GLSL { | ||
| 18 | namespace { | ||
| 19 | template <class Func> | ||
| 20 | struct FuncTraits {}; | ||
| 21 | |||
| 22 | template <class ReturnType_, class... Args> | ||
| 23 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 24 | using ReturnType = ReturnType_; | ||
| 25 | |||
| 26 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 27 | |||
| 28 | template <size_t I> | ||
| 29 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 30 | }; | ||
| 31 | |||
| 32 | template <auto func, typename... Args> | ||
| 33 | void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { | ||
| 34 | inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); | ||
| 35 | } | ||
| 36 | |||
| 37 | template <typename ArgType> | ||
| 38 | auto Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 39 | if constexpr (std::is_same_v<ArgType, std::string_view>) { | ||
| 40 | return ctx.var_alloc.Consume(arg); | ||
| 41 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 42 | return arg; | ||
| 43 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 44 | return arg.U32(); | ||
| 45 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 46 | return arg.Attribute(); | ||
| 47 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 48 | return arg.Patch(); | ||
| 49 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 50 | return arg.Reg(); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 55 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 56 | using Traits = FuncTraits<decltype(func)>; | ||
| 57 | if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { | ||
| 58 | if constexpr (is_first_arg_inst) { | ||
| 59 | SetDefinition<func>( | ||
| 60 | ctx, inst, *inst, | ||
| 61 | Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 62 | } else { | ||
| 63 | SetDefinition<func>( | ||
| 64 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 65 | } | ||
| 66 | } else { | ||
| 67 | if constexpr (is_first_arg_inst) { | ||
| 68 | func(ctx, *inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 69 | } else { | ||
| 70 | func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | template <auto func> | ||
| 76 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 77 | using Traits = FuncTraits<decltype(func)>; | ||
| 78 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 79 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 80 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 81 | } else { | ||
| 82 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 83 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; | ||
| 84 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 85 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 90 | switch (inst->GetOpcode()) { | ||
| 91 | #define OPCODE(name, result_type, ...) \ | ||
| 92 | case IR::Opcode::name: \ | ||
| 93 | return Invoke<&Emit##name>(ctx, inst); | ||
| 94 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 95 | #undef OPCODE | ||
| 96 | } | ||
| 97 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 98 | } | ||
| 99 | |||
| 100 | bool IsReference(IR::Inst& inst) { | ||
| 101 | return inst.GetOpcode() == IR::Opcode::Reference; | ||
| 102 | } | ||
| 103 | |||
| 104 | void PrecolorInst(IR::Inst& phi) { | ||
| 105 | // Insert phi moves before references to avoid overwritting other phis | ||
| 106 | const size_t num_args{phi.NumArgs()}; | ||
| 107 | for (size_t i = 0; i < num_args; ++i) { | ||
| 108 | IR::Block& phi_block{*phi.PhiBlock(i)}; | ||
| 109 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | ||
| 110 | IR::IREmitter ir{phi_block, it}; | ||
| 111 | const IR::Value arg{phi.Arg(i)}; | ||
| 112 | if (arg.IsImmediate()) { | ||
| 113 | ir.PhiMove(phi, arg); | ||
| 114 | } else { | ||
| 115 | ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | for (size_t i = 0; i < num_args; ++i) { | ||
| 119 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void Precolor(const IR::Program& program) { | ||
| 124 | for (IR::Block* const block : program.blocks) { | ||
| 125 | for (IR::Inst& phi : block->Instructions()) { | ||
| 126 | if (!IR::IsPhi(phi)) { | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | PrecolorInst(phi); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||
| 135 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 136 | switch (node.type) { | ||
| 137 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 138 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 139 | EmitInst(ctx, &inst); | ||
| 140 | } | ||
| 141 | break; | ||
| 142 | case IR::AbstractSyntaxNode::Type::If: | ||
| 143 | ctx.Add("if({}){{", ctx.var_alloc.Consume(node.data.if_node.cond)); | ||
| 144 | break; | ||
| 145 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 146 | ctx.Add("}}"); | ||
| 147 | break; | ||
| 148 | case IR::AbstractSyntaxNode::Type::Break: | ||
| 149 | if (node.data.break_node.cond.IsImmediate()) { | ||
| 150 | if (node.data.break_node.cond.U1()) { | ||
| 151 | ctx.Add("break;"); | ||
| 152 | } | ||
| 153 | } else { | ||
| 154 | ctx.Add("if({}){{break;}}", ctx.var_alloc.Consume(node.data.break_node.cond)); | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 158 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 159 | ctx.Add("return;"); | ||
| 160 | break; | ||
| 161 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 162 | ctx.Add("for(;;){{"); | ||
| 163 | break; | ||
| 164 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 165 | if (Settings::values.disable_shader_loop_safety_checks) { | ||
| 166 | ctx.Add("if(!{}){{break;}}}}", ctx.var_alloc.Consume(node.data.repeat.cond)); | ||
| 167 | } else { | ||
| 168 | ctx.Add("if(--loop{}<0 || !{}){{break;}}}}", ctx.num_safety_loop_vars++, | ||
| 169 | ctx.var_alloc.Consume(node.data.repeat.cond)); | ||
| 170 | } | ||
| 171 | break; | ||
| 172 | default: | ||
| 173 | throw NotImplementedException("AbstractSyntaxNode Type {}", node.type); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | std::string GlslVersionSpecifier(const EmitContext& ctx) { | ||
| 179 | if (ctx.uses_y_direction || ctx.info.stores.Legacy() || ctx.info.loads.Legacy()) { | ||
| 180 | return " compatibility"; | ||
| 181 | } | ||
| 182 | return ""; | ||
| 183 | } | ||
| 184 | |||
| 185 | bool IsPreciseType(GlslVarType type) { | ||
| 186 | switch (type) { | ||
| 187 | case GlslVarType::PrecF32: | ||
| 188 | case GlslVarType::PrecF64: | ||
| 189 | return true; | ||
| 190 | default: | ||
| 191 | return false; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | void DefineVariables(const EmitContext& ctx, std::string& header) { | ||
| 196 | for (u32 i = 0; i < static_cast<u32>(GlslVarType::Void); ++i) { | ||
| 197 | const auto type{static_cast<GlslVarType>(i)}; | ||
| 198 | const auto& tracker{ctx.var_alloc.GetUseTracker(type)}; | ||
| 199 | const auto type_name{ctx.var_alloc.GetGlslType(type)}; | ||
| 200 | const bool has_precise_bug{ctx.stage == Stage::Fragment && ctx.profile.has_gl_precise_bug}; | ||
| 201 | const auto precise{!has_precise_bug && IsPreciseType(type) ? "precise " : ""}; | ||
| 202 | // Temps/return types that are never used are stored at index 0 | ||
| 203 | if (tracker.uses_temp) { | ||
| 204 | header += fmt::format("{}{} t{}={}(0);", precise, type_name, | ||
| 205 | ctx.var_alloc.Representation(0, type), type_name); | ||
| 206 | } | ||
| 207 | for (u32 index = 0; index < tracker.num_used; ++index) { | ||
| 208 | header += fmt::format("{}{} {}={}(0);", precise, type_name, | ||
| 209 | ctx.var_alloc.Representation(index, type), type_name); | ||
| 210 | } | ||
| 211 | } | ||
| 212 | for (u32 i = 0; i < ctx.num_safety_loop_vars; ++i) { | ||
| 213 | header += fmt::format("int loop{}=0x2000;", i); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } // Anonymous namespace | ||
| 217 | |||
| 218 | std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, | ||
| 219 | Bindings& bindings) { | ||
| 220 | EmitContext ctx{program, bindings, profile, runtime_info}; | ||
| 221 | Precolor(program); | ||
| 222 | EmitCode(ctx, program); | ||
| 223 | const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; | ||
| 224 | ctx.header.insert(0, version); | ||
| 225 | if (program.shared_memory_size > 0) { | ||
| 226 | const auto requested_size{program.shared_memory_size}; | ||
| 227 | const auto max_size{profile.gl_max_compute_smem_size}; | ||
| 228 | const bool needs_clamp{requested_size > max_size}; | ||
| 229 | if (needs_clamp) { | ||
| 230 | LOG_WARNING(Shader_GLSL, "Requested shared memory size ({}) exceeds device limit ({})", | ||
| 231 | requested_size, max_size); | ||
| 232 | } | ||
| 233 | const auto smem_size{needs_clamp ? max_size : requested_size}; | ||
| 234 | ctx.header += fmt::format("shared uint smem[{}];", Common::DivCeil(smem_size, 4U)); | ||
| 235 | } | ||
| 236 | ctx.header += "void main(){\n"; | ||
| 237 | if (program.local_memory_size > 0) { | ||
| 238 | ctx.header += fmt::format("uint lmem[{}];", Common::DivCeil(program.local_memory_size, 4U)); | ||
| 239 | } | ||
| 240 | DefineVariables(ctx, ctx.header); | ||
| 241 | if (ctx.uses_cc_carry) { | ||
| 242 | ctx.header += "uint carry;"; | ||
| 243 | } | ||
| 244 | if (program.info.uses_subgroup_shuffles) { | ||
| 245 | ctx.header += "bool shfl_in_bounds;"; | ||
| 246 | } | ||
| 247 | ctx.code.insert(0, ctx.header); | ||
| 248 | ctx.code += '}'; | ||
| 249 | return ctx.code; | ||
| 250 | } | ||
| 251 | |||
| 252 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.h b/src/shader_recompiler/backend/glsl/emit_glsl.h new file mode 100644 index 000000000..20e5719e6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl.h | |||
| @@ -0,0 +1,24 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/bindings.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | #include "shader_recompiler/runtime_info.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLSL { | ||
| 15 | |||
| 16 | [[nodiscard]] std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 17 | IR::Program& program, Bindings& bindings); | ||
| 18 | |||
| 19 | [[nodiscard]] inline std::string EmitGLSL(const Profile& profile, IR::Program& program) { | ||
| 20 | Bindings binding; | ||
| 21 | return EmitGLSL(profile, {}, program, binding); | ||
| 22 | } | ||
| 23 | |||
| 24 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp new file mode 100644 index 000000000..772acc5a4 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp | |||
| @@ -0,0 +1,418 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | constexpr char cas_loop[]{ | ||
| 14 | "for (;;){{uint old={};{}=atomicCompSwap({},old,{}({},{}));if({}==old){{break;}}}}"}; | ||
| 15 | |||
| 16 | void SharedCasFunction(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||
| 17 | std::string_view value, std::string_view function) { | ||
| 18 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 19 | const std::string smem{fmt::format("smem[{}>>2]", offset)}; | ||
| 20 | ctx.Add(cas_loop, smem, ret, smem, function, smem, value, ret); | ||
| 21 | } | ||
| 22 | |||
| 23 | void SsboCasFunction(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 24 | const IR::Value& offset, std::string_view value, std::string_view function) { | ||
| 25 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 26 | const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), | ||
| 27 | ctx.var_alloc.Consume(offset))}; | ||
| 28 | ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); | ||
| 29 | } | ||
| 30 | |||
| 31 | void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 32 | const IR::Value& offset, std::string_view value, | ||
| 33 | std::string_view function) { | ||
| 34 | const std::string ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), | ||
| 35 | ctx.var_alloc.Consume(offset))}; | ||
| 36 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 37 | ctx.Add(cas_loop, ssbo, ret, ssbo, function, ssbo, value, ret); | ||
| 38 | ctx.AddF32("{}=utof({});", inst, ret); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 43 | std::string_view value) { | ||
| 44 | ctx.AddU32("{}=atomicAdd(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 48 | std::string_view value) { | ||
| 49 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 50 | SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMinS32"); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 54 | std::string_view value) { | ||
| 55 | ctx.AddU32("{}=atomicMin(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 59 | std::string_view value) { | ||
| 60 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 61 | SharedCasFunction(ctx, inst, pointer_offset, u32_value, "CasMaxS32"); | ||
| 62 | } | ||
| 63 | |||
| 64 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 65 | std::string_view value) { | ||
| 66 | ctx.AddU32("{}=atomicMax(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 70 | std::string_view value) { | ||
| 71 | SharedCasFunction(ctx, inst, pointer_offset, value, "CasIncrement"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 75 | std::string_view value) { | ||
| 76 | SharedCasFunction(ctx, inst, pointer_offset, value, "CasDecrement"); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 80 | std::string_view value) { | ||
| 81 | ctx.AddU32("{}=atomicAnd(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 82 | } | ||
| 83 | |||
| 84 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 85 | std::string_view value) { | ||
| 86 | ctx.AddU32("{}=atomicOr(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 90 | std::string_view value) { | ||
| 91 | ctx.AddU32("{}=atomicXor(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 92 | } | ||
| 93 | |||
| 94 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 95 | std::string_view value) { | ||
| 96 | ctx.AddU32("{}=atomicExchange(smem[{}>>2],{});", inst, pointer_offset, value); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 100 | std::string_view value) { | ||
| 101 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 102 | ctx.AddU64("{}=packUint2x32(uvec2(smem[{}>>2],smem[({}+4)>>2]));", inst, pointer_offset, | ||
| 103 | pointer_offset); | ||
| 104 | ctx.Add("smem[{}>>2]=unpackUint2x32({}).x;smem[({}+4)>>2]=unpackUint2x32({}).y;", | ||
| 105 | pointer_offset, value, pointer_offset, value); | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 109 | const IR::Value& offset, std::string_view value) { | ||
| 110 | ctx.AddU32("{}=atomicAdd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 111 | ctx.var_alloc.Consume(offset), value); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 115 | const IR::Value& offset, std::string_view value) { | ||
| 116 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 117 | SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMinS32"); | ||
| 118 | } | ||
| 119 | |||
| 120 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 121 | const IR::Value& offset, std::string_view value) { | ||
| 122 | ctx.AddU32("{}=atomicMin({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 123 | ctx.var_alloc.Consume(offset), value); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 127 | const IR::Value& offset, std::string_view value) { | ||
| 128 | const std::string u32_value{fmt::format("uint({})", value)}; | ||
| 129 | SsboCasFunction(ctx, inst, binding, offset, u32_value, "CasMaxS32"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 133 | const IR::Value& offset, std::string_view value) { | ||
| 134 | ctx.AddU32("{}=atomicMax({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 135 | ctx.var_alloc.Consume(offset), value); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 139 | const IR::Value& offset, std::string_view value) { | ||
| 140 | SsboCasFunction(ctx, inst, binding, offset, value, "CasIncrement"); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 144 | const IR::Value& offset, std::string_view value) { | ||
| 145 | SsboCasFunction(ctx, inst, binding, offset, value, "CasDecrement"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 149 | const IR::Value& offset, std::string_view value) { | ||
| 150 | ctx.AddU32("{}=atomicAnd({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 151 | ctx.var_alloc.Consume(offset), value); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 155 | const IR::Value& offset, std::string_view value) { | ||
| 156 | ctx.AddU32("{}=atomicOr({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 157 | ctx.var_alloc.Consume(offset), value); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 161 | const IR::Value& offset, std::string_view value) { | ||
| 162 | ctx.AddU32("{}=atomicXor({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 163 | ctx.var_alloc.Consume(offset), value); | ||
| 164 | } | ||
| 165 | |||
| 166 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 167 | const IR::Value& offset, std::string_view value) { | ||
| 168 | ctx.AddU32("{}=atomicExchange({}_ssbo{}[{}>>2],{});", inst, ctx.stage_name, binding.U32(), | ||
| 169 | ctx.var_alloc.Consume(offset), value); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 173 | const IR::Value& offset, std::string_view value) { | ||
| 174 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 175 | ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 176 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 177 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 178 | ctx.Add("{}_ssbo{}[{}>>2]+=unpackUint2x32({}).x;{}_ssbo{}[({}>>2)+1]+=unpackUint2x32({}).y;", | ||
| 179 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, | ||
| 180 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 184 | const IR::Value& offset, std::string_view value) { | ||
| 185 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 186 | ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 187 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 188 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 189 | ctx.Add("for(int i=0;i<2;++i){{ " | ||
| 190 | "{}_ssbo{}[({}>>2)+i]=uint(min(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" | ||
| 191 | ");}}", | ||
| 192 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 193 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 194 | } | ||
| 195 | |||
| 196 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 197 | const IR::Value& offset, std::string_view value) { | ||
| 198 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 199 | ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 200 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 201 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 202 | ctx.Add("for(int i=0;i<2;++i){{ " | ||
| 203 | "{}_ssbo{}[({}>>2)+i]=min({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}))[i]);}}", | ||
| 204 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 205 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 209 | const IR::Value& offset, std::string_view value) { | ||
| 210 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 211 | ctx.AddU64("{}=packInt2x32(ivec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 212 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 213 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 214 | ctx.Add("for(int i=0;i<2;++i){{ " | ||
| 215 | "{}_ssbo{}[({}>>2)+i]=uint(max(int({}_ssbo{}[({}>>2)+i]),unpackInt2x32(int64_t({}))[i])" | ||
| 216 | ");}}", | ||
| 217 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 218 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 222 | const IR::Value& offset, std::string_view value) { | ||
| 223 | LOG_WARNING(Shader_GLSL, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 224 | ctx.AddU64("{}=packUint2x32(uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}>>2)+1]));", inst, | ||
| 225 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 226 | binding.U32(), ctx.var_alloc.Consume(offset)); | ||
| 227 | ctx.Add("for(int " | ||
| 228 | "i=0;i<2;++i){{{}_ssbo{}[({}>>2)+i]=max({}_ssbo{}[({}>>2)+i],unpackUint2x32(uint64_t({}" | ||
| 229 | "))[i]);}}", | ||
| 230 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), ctx.stage_name, | ||
| 231 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 235 | const IR::Value& offset, std::string_view value) { | ||
| 236 | ctx.AddU64( | ||
| 237 | "{}=packUint2x32(uvec2(atomicAnd({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicAnd({}_" | ||
| 238 | "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 239 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, | ||
| 240 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 241 | } | ||
| 242 | |||
| 243 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 244 | const IR::Value& offset, std::string_view value) { | ||
| 245 | ctx.AddU64("{}=packUint2x32(uvec2(atomicOr({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicOr({}_" | ||
| 246 | "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 247 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, | ||
| 248 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 249 | } | ||
| 250 | |||
| 251 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 252 | const IR::Value& offset, std::string_view value) { | ||
| 253 | ctx.AddU64( | ||
| 254 | "{}=packUint2x32(uvec2(atomicXor({}_ssbo{}[{}>>2],unpackUint2x32({}).x),atomicXor({}_" | ||
| 255 | "ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 256 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, ctx.stage_name, | ||
| 257 | binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 258 | } | ||
| 259 | |||
| 260 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 261 | const IR::Value& offset, std::string_view value) { | ||
| 262 | ctx.AddU64("{}=packUint2x32(uvec2(atomicExchange({}_ssbo{}[{}>>2],unpackUint2x32({}).x)," | ||
| 263 | "atomicExchange({}_ssbo{}[({}>>2)+1],unpackUint2x32({}).y)));", | ||
| 264 | inst, ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value, | ||
| 265 | ctx.stage_name, binding.U32(), ctx.var_alloc.Consume(offset), value); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 269 | const IR::Value& offset, std::string_view value) { | ||
| 270 | SsboCasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); | ||
| 271 | } | ||
| 272 | |||
| 273 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 274 | const IR::Value& offset, std::string_view value) { | ||
| 275 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); | ||
| 276 | } | ||
| 277 | |||
| 278 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 279 | const IR::Value& offset, std::string_view value) { | ||
| 280 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); | ||
| 281 | } | ||
| 282 | |||
| 283 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 284 | const IR::Value& offset, std::string_view value) { | ||
| 285 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin16x2"); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 289 | const IR::Value& offset, std::string_view value) { | ||
| 290 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMin32x2"); | ||
| 291 | } | ||
| 292 | |||
| 293 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 294 | const IR::Value& offset, std::string_view value) { | ||
| 295 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax16x2"); | ||
| 296 | } | ||
| 297 | |||
| 298 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 299 | const IR::Value& offset, std::string_view value) { | ||
| 300 | SsboCasFunction(ctx, inst, binding, offset, value, "CasFloatMax32x2"); | ||
| 301 | } | ||
| 302 | |||
| 303 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 304 | throw NotImplementedException("GLSL Instrucion"); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 308 | throw NotImplementedException("GLSL Instrucion"); | ||
| 309 | } | ||
| 310 | |||
| 311 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 312 | throw NotImplementedException("GLSL Instrucion"); | ||
| 313 | } | ||
| 314 | |||
| 315 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 316 | throw NotImplementedException("GLSL Instrucion"); | ||
| 317 | } | ||
| 318 | |||
| 319 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 320 | throw NotImplementedException("GLSL Instrucion"); | ||
| 321 | } | ||
| 322 | |||
| 323 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 324 | throw NotImplementedException("GLSL Instrucion"); | ||
| 325 | } | ||
| 326 | |||
| 327 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 328 | throw NotImplementedException("GLSL Instrucion"); | ||
| 329 | } | ||
| 330 | |||
| 331 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 332 | throw NotImplementedException("GLSL Instrucion"); | ||
| 333 | } | ||
| 334 | |||
| 335 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 336 | throw NotImplementedException("GLSL Instrucion"); | ||
| 337 | } | ||
| 338 | |||
| 339 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 340 | throw NotImplementedException("GLSL Instrucion"); | ||
| 341 | } | ||
| 342 | |||
| 343 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 344 | throw NotImplementedException("GLSL Instrucion"); | ||
| 345 | } | ||
| 346 | |||
| 347 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 348 | throw NotImplementedException("GLSL Instrucion"); | ||
| 349 | } | ||
| 350 | |||
| 351 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 352 | throw NotImplementedException("GLSL Instrucion"); | ||
| 353 | } | ||
| 354 | |||
| 355 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 356 | throw NotImplementedException("GLSL Instrucion"); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 360 | throw NotImplementedException("GLSL Instrucion"); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 364 | throw NotImplementedException("GLSL Instrucion"); | ||
| 365 | } | ||
| 366 | |||
| 367 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 368 | throw NotImplementedException("GLSL Instrucion"); | ||
| 369 | } | ||
| 370 | |||
| 371 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 372 | throw NotImplementedException("GLSL Instrucion"); | ||
| 373 | } | ||
| 374 | |||
| 375 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 376 | throw NotImplementedException("GLSL Instrucion"); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 380 | throw NotImplementedException("GLSL Instrucion"); | ||
| 381 | } | ||
| 382 | |||
| 383 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 384 | throw NotImplementedException("GLSL Instrucion"); | ||
| 385 | } | ||
| 386 | |||
| 387 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 388 | throw NotImplementedException("GLSL Instrucion"); | ||
| 389 | } | ||
| 390 | |||
| 391 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 392 | throw NotImplementedException("GLSL Instrucion"); | ||
| 393 | } | ||
| 394 | |||
| 395 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 396 | throw NotImplementedException("GLSL Instrucion"); | ||
| 397 | } | ||
| 398 | |||
| 399 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 400 | throw NotImplementedException("GLSL Instrucion"); | ||
| 401 | } | ||
| 402 | |||
| 403 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 404 | throw NotImplementedException("GLSL Instrucion"); | ||
| 405 | } | ||
| 406 | |||
| 407 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 408 | throw NotImplementedException("GLSL Instrucion"); | ||
| 409 | } | ||
| 410 | |||
| 411 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 412 | throw NotImplementedException("GLSL Instrucion"); | ||
| 413 | } | ||
| 414 | |||
| 415 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 416 | throw NotImplementedException("GLSL Instrucion"); | ||
| 417 | } | ||
| 418 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp new file mode 100644 index 000000000..e1d1b558e --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_barriers.cpp | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLSL { | ||
| 10 | void EmitBarrier(EmitContext& ctx) { | ||
| 11 | ctx.Add("barrier();"); | ||
| 12 | } | ||
| 13 | |||
| 14 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 15 | ctx.Add("groupMemoryBarrier();"); | ||
| 16 | } | ||
| 17 | |||
| 18 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 19 | ctx.Add("memoryBarrier();"); | ||
| 20 | } | ||
| 21 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp new file mode 100644 index 000000000..3c1714e89 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | void Alias(IR::Inst& inst, const IR::Value& value) { | ||
| 14 | if (value.IsImmediate()) { | ||
| 15 | return; | ||
| 16 | } | ||
| 17 | IR::Inst& value_inst{*value.InstRecursive()}; | ||
| 18 | value_inst.DestructiveAddUsage(inst.UseCount()); | ||
| 19 | value_inst.DestructiveRemoveUsage(); | ||
| 20 | inst.SetDefinition(value_inst.Definition<Id>()); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 25 | Alias(inst, value); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { | ||
| 29 | // Fake one usage to get a real variable out of the condition | ||
| 30 | inst.DestructiveAddUsage(1); | ||
| 31 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U1)}; | ||
| 32 | const auto input{ctx.var_alloc.Consume(value)}; | ||
| 33 | if (ret != input) { | ||
| 34 | ctx.Add("{}={};", ret, input); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { | ||
| 39 | NotImplemented(); | ||
| 40 | } | ||
| 41 | |||
| 42 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 43 | ctx.AddU32("{}=ftou({});", inst, value); | ||
| 44 | } | ||
| 45 | |||
| 46 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 47 | ctx.AddU64("{}=doubleBitsToUint64({});", inst, value); | ||
| 48 | } | ||
| 49 | |||
| 50 | void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { | ||
| 51 | NotImplemented(); | ||
| 52 | } | ||
| 53 | |||
| 54 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 55 | ctx.AddF32("{}=utof({});", inst, value); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 59 | ctx.AddF64("{}=uint64BitsToDouble({});", inst, value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 63 | ctx.AddU64("{}=packUint2x32({});", inst, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 67 | ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); | ||
| 68 | } | ||
| 69 | |||
| 70 | void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 71 | ctx.AddU32("{}=packFloat2x16({});", inst, value); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 75 | ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); | ||
| 76 | } | ||
| 77 | |||
| 78 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 79 | ctx.AddU32("{}=packHalf2x16({});", inst, value); | ||
| 80 | } | ||
| 81 | |||
| 82 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 83 | ctx.AddF32x2("{}=unpackHalf2x16({});", inst, value); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 87 | ctx.AddF64("{}=packDouble2x32({});", inst, value); | ||
| 88 | } | ||
| 89 | |||
| 90 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 91 | ctx.AddU32x2("{}=unpackDouble2x32({});", inst, value); | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp new file mode 100644 index 000000000..49a66e3ec --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_composite.cpp | |||
| @@ -0,0 +1,219 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | constexpr std::string_view SWIZZLE{"xyzw"}; | ||
| 14 | void CompositeInsert(EmitContext& ctx, std::string_view result, std::string_view composite, | ||
| 15 | std::string_view object, u32 index) { | ||
| 16 | if (result == composite) { | ||
| 17 | // The result is aliased with the composite | ||
| 18 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 19 | } else { | ||
| 20 | ctx.Add("{}={};{}.{}={};", result, composite, result, SWIZZLE[index], object); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 26 | std::string_view e2) { | ||
| 27 | ctx.AddU32x2("{}=uvec2({},{});", inst, e1, e2); | ||
| 28 | } | ||
| 29 | |||
| 30 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 31 | std::string_view e2, std::string_view e3) { | ||
| 32 | ctx.AddU32x3("{}=uvec3({},{},{});", inst, e1, e2, e3); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 36 | std::string_view e2, std::string_view e3, std::string_view e4) { | ||
| 37 | ctx.AddU32x4("{}=uvec4({},{},{},{});", inst, e1, e2, e3, e4); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 41 | u32 index) { | ||
| 42 | ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 46 | u32 index) { | ||
| 47 | ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 48 | } | ||
| 49 | |||
| 50 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 51 | u32 index) { | ||
| 52 | ctx.AddU32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 56 | std::string_view object, u32 index) { | ||
| 57 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | ||
| 58 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 62 | std::string_view object, u32 index) { | ||
| 63 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x3)}; | ||
| 64 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 68 | std::string_view object, u32 index) { | ||
| 69 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x4)}; | ||
| 70 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 74 | [[maybe_unused]] std::string_view e1, | ||
| 75 | [[maybe_unused]] std::string_view e2) { | ||
| 76 | NotImplemented(); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 80 | [[maybe_unused]] std::string_view e1, | ||
| 81 | [[maybe_unused]] std::string_view e2, | ||
| 82 | [[maybe_unused]] std::string_view e3) { | ||
| 83 | NotImplemented(); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 87 | [[maybe_unused]] std::string_view e1, | ||
| 88 | [[maybe_unused]] std::string_view e2, | ||
| 89 | [[maybe_unused]] std::string_view e3, | ||
| 90 | [[maybe_unused]] std::string_view e4) { | ||
| 91 | NotImplemented(); | ||
| 92 | } | ||
| 93 | |||
| 94 | void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 95 | [[maybe_unused]] std::string_view composite, | ||
| 96 | [[maybe_unused]] u32 index) { | ||
| 97 | NotImplemented(); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 101 | [[maybe_unused]] std::string_view composite, | ||
| 102 | [[maybe_unused]] u32 index) { | ||
| 103 | NotImplemented(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 107 | [[maybe_unused]] std::string_view composite, | ||
| 108 | [[maybe_unused]] u32 index) { | ||
| 109 | NotImplemented(); | ||
| 110 | } | ||
| 111 | |||
| 112 | void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 113 | [[maybe_unused]] std::string_view composite, | ||
| 114 | [[maybe_unused]] std::string_view object, | ||
| 115 | [[maybe_unused]] u32 index) { | ||
| 116 | NotImplemented(); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 120 | [[maybe_unused]] std::string_view composite, | ||
| 121 | [[maybe_unused]] std::string_view object, | ||
| 122 | [[maybe_unused]] u32 index) { | ||
| 123 | NotImplemented(); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 127 | [[maybe_unused]] std::string_view composite, | ||
| 128 | [[maybe_unused]] std::string_view object, | ||
| 129 | [[maybe_unused]] u32 index) { | ||
| 130 | NotImplemented(); | ||
| 131 | } | ||
| 132 | |||
| 133 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 134 | std::string_view e2) { | ||
| 135 | ctx.AddF32x2("{}=vec2({},{});", inst, e1, e2); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 139 | std::string_view e2, std::string_view e3) { | ||
| 140 | ctx.AddF32x3("{}=vec3({},{},{});", inst, e1, e2, e3); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 144 | std::string_view e2, std::string_view e3, std::string_view e4) { | ||
| 145 | ctx.AddF32x4("{}=vec4({},{},{},{});", inst, e1, e2, e3, e4); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 149 | u32 index) { | ||
| 150 | ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 151 | } | ||
| 152 | |||
| 153 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 154 | u32 index) { | ||
| 155 | ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 156 | } | ||
| 157 | |||
| 158 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 159 | u32 index) { | ||
| 160 | ctx.AddF32("{}={}.{};", inst, composite, SWIZZLE[index]); | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 164 | std::string_view object, u32 index) { | ||
| 165 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x2)}; | ||
| 166 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 167 | } | ||
| 168 | |||
| 169 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 170 | std::string_view object, u32 index) { | ||
| 171 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x3)}; | ||
| 172 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 176 | std::string_view object, u32 index) { | ||
| 177 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 178 | CompositeInsert(ctx, ret, composite, object, index); | ||
| 179 | } | ||
| 180 | |||
| 181 | void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 182 | NotImplemented(); | ||
| 183 | } | ||
| 184 | |||
| 185 | void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 186 | NotImplemented(); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 190 | NotImplemented(); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 194 | NotImplemented(); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 198 | NotImplemented(); | ||
| 199 | } | ||
| 200 | |||
| 201 | void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 202 | NotImplemented(); | ||
| 203 | } | ||
| 204 | |||
| 205 | void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 206 | u32 index) { | ||
| 207 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 211 | u32 index) { | ||
| 212 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 213 | } | ||
| 214 | |||
| 215 | void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 216 | u32 index) { | ||
| 217 | ctx.Add("{}.{}={};", composite, SWIZZLE[index], object); | ||
| 218 | } | ||
| 219 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp new file mode 100644 index 000000000..580063fa9 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -0,0 +1,456 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLSL { | ||
| 14 | namespace { | ||
| 15 | constexpr char SWIZZLE[]{"xyzw"}; | ||
| 16 | |||
| 17 | u32 CbufIndex(u32 offset) { | ||
| 18 | return (offset / 4) % 4; | ||
| 19 | } | ||
| 20 | |||
| 21 | char OffsetSwizzle(u32 offset) { | ||
| 22 | return SWIZZLE[CbufIndex(offset)]; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool IsInputArray(Stage stage) { | ||
| 26 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 27 | stage == Stage::TessellationEval; | ||
| 28 | } | ||
| 29 | |||
| 30 | std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) { | ||
| 31 | return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; | ||
| 32 | } | ||
| 33 | |||
| 34 | std::string_view OutputVertexIndex(EmitContext& ctx) { | ||
| 35 | return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; | ||
| 36 | } | ||
| 37 | |||
| 38 | void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, | ||
| 39 | const IR::Value& offset, u32 num_bits, std::string_view cast = {}, | ||
| 40 | std::string_view bit_offset = {}) { | ||
| 41 | const bool is_immediate{offset.IsImmediate()}; | ||
| 42 | const bool component_indexing_bug{!is_immediate && ctx.profile.has_gl_component_indexing_bug}; | ||
| 43 | if (is_immediate) { | ||
| 44 | const s32 signed_offset{static_cast<s32>(offset.U32())}; | ||
| 45 | static constexpr u32 cbuf_size{0x10000}; | ||
| 46 | if (signed_offset < 0 || offset.U32() > cbuf_size) { | ||
| 47 | LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); | ||
| 48 | ctx.Add("{}=0u;", ret); | ||
| 49 | return; | ||
| 50 | } | ||
| 51 | } | ||
| 52 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 53 | const auto index{is_immediate ? fmt::format("{}", offset.U32() / 16) | ||
| 54 | : fmt::format("{}>>4", offset_var)}; | ||
| 55 | const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) | ||
| 56 | : fmt::format("[({}>>2)%4]", offset_var)}; | ||
| 57 | |||
| 58 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 59 | const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; | ||
| 60 | const auto extraction{num_bits == 32 ? cbuf_cast | ||
| 61 | : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, | ||
| 62 | bit_offset, num_bits)}; | ||
| 63 | if (!component_indexing_bug) { | ||
| 64 | const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; | ||
| 65 | ctx.Add("{}={};", ret, result); | ||
| 66 | return; | ||
| 67 | } | ||
| 68 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 69 | for (u32 i = 0; i < 4; ++i) { | ||
| 70 | const auto swizzle_string{fmt::format(".{}", "xyzw"[i])}; | ||
| 71 | const auto result{fmt::format(fmt::runtime(extraction), swizzle_string)}; | ||
| 72 | ctx.Add("if(({}&3)=={}){}={};", cbuf_offset, i, ret, result); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | void GetCbuf8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, | ||
| 77 | std::string_view cast) { | ||
| 78 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 79 | if (offset.IsImmediate()) { | ||
| 80 | const auto bit_offset{fmt::format("{}", (offset.U32() % 4) * 8)}; | ||
| 81 | GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); | ||
| 82 | } else { | ||
| 83 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 84 | const auto bit_offset{fmt::format("({}%4)*8", offset_var)}; | ||
| 85 | GetCbuf(ctx, ret, binding, offset, 8, cast, bit_offset); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset, | ||
| 90 | std::string_view cast) { | ||
| 91 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 92 | if (offset.IsImmediate()) { | ||
| 93 | const auto bit_offset{fmt::format("{}", ((offset.U32() / 2) % 2) * 16)}; | ||
| 94 | GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); | ||
| 95 | } else { | ||
| 96 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 97 | const auto bit_offset{fmt::format("(({}>>1)%2)*16", offset_var)}; | ||
| 98 | GetCbuf(ctx, ret, binding, offset, 16, cast, bit_offset); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 TexCoordIndex(IR::Attribute attr) { | ||
| 103 | return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; | ||
| 104 | } | ||
| 105 | } // Anonymous namespace | ||
| 106 | |||
| 107 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 108 | const IR::Value& offset) { | ||
| 109 | GetCbuf8(ctx, inst, binding, offset, "ftou"); | ||
| 110 | } | ||
| 111 | |||
| 112 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 113 | const IR::Value& offset) { | ||
| 114 | GetCbuf8(ctx, inst, binding, offset, "ftoi"); | ||
| 115 | } | ||
| 116 | |||
| 117 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 118 | const IR::Value& offset) { | ||
| 119 | GetCbuf16(ctx, inst, binding, offset, "ftou"); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 123 | const IR::Value& offset) { | ||
| 124 | GetCbuf16(ctx, inst, binding, offset, "ftoi"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 128 | const IR::Value& offset) { | ||
| 129 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 130 | GetCbuf(ctx, ret, binding, offset, 32, "ftou"); | ||
| 131 | } | ||
| 132 | |||
| 133 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 134 | const IR::Value& offset) { | ||
| 135 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; | ||
| 136 | GetCbuf(ctx, ret, binding, offset, 32); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 140 | const IR::Value& offset) { | ||
| 141 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||
| 142 | if (offset.IsImmediate()) { | ||
| 143 | static constexpr u32 cbuf_size{0x10000}; | ||
| 144 | const u32 u32_offset{offset.U32()}; | ||
| 145 | const s32 signed_offset{static_cast<s32>(offset.U32())}; | ||
| 146 | if (signed_offset < 0 || u32_offset > cbuf_size) { | ||
| 147 | LOG_WARNING(Shader_GLSL, "Immediate constant buffer offset is out of bounds"); | ||
| 148 | ctx.AddU32x2("{}=uvec2(0u);", inst); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | if (u32_offset % 2 == 0) { | ||
| 152 | ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, | ||
| 153 | OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); | ||
| 154 | } else { | ||
| 155 | ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, | ||
| 156 | OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, | ||
| 157 | OffsetSwizzle(u32_offset + 4)); | ||
| 158 | } | ||
| 159 | return; | ||
| 160 | } | ||
| 161 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 162 | if (!ctx.profile.has_gl_component_indexing_bug) { | ||
| 163 | ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", | ||
| 164 | inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); | ||
| 165 | return; | ||
| 166 | } | ||
| 167 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | ||
| 168 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||
| 169 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 170 | ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, | ||
| 171 | swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, | ||
| 172 | "xyzw"[(swizzle + 1) % 4]); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, | ||
| 177 | std::string_view vertex) { | ||
| 178 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 179 | const char swizzle{"xyzw"[element]}; | ||
| 180 | if (IR::IsGeneric(attr)) { | ||
| 181 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 182 | if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { | ||
| 183 | if (element == 3) { | ||
| 184 | ctx.AddF32("{}=1.f;", inst, attr); | ||
| 185 | } else { | ||
| 186 | ctx.AddF32("{}=0.f;", inst, attr); | ||
| 187 | } | ||
| 188 | return; | ||
| 189 | } | ||
| 190 | ctx.AddF32("{}=in_attr{}{}.{};", inst, index, InputVertexIndex(ctx, vertex), swizzle); | ||
| 191 | return; | ||
| 192 | } | ||
| 193 | // GLSL only exposes 8 legacy texcoords | ||
| 194 | if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 195 | LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", | ||
| 196 | TexCoordIndex(attr)); | ||
| 197 | ctx.AddF32("{}=0.f;", inst); | ||
| 198 | return; | ||
| 199 | } | ||
| 200 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { | ||
| 201 | const u32 index{TexCoordIndex(attr)}; | ||
| 202 | ctx.AddF32("{}=gl_TexCoord[{}].{};", inst, index, swizzle); | ||
| 203 | return; | ||
| 204 | } | ||
| 205 | switch (attr) { | ||
| 206 | case IR::Attribute::PrimitiveId: | ||
| 207 | ctx.AddF32("{}=itof(gl_PrimitiveID);", inst); | ||
| 208 | break; | ||
| 209 | case IR::Attribute::PositionX: | ||
| 210 | case IR::Attribute::PositionY: | ||
| 211 | case IR::Attribute::PositionZ: | ||
| 212 | case IR::Attribute::PositionW: { | ||
| 213 | const bool is_array{IsInputArray(ctx.stage)}; | ||
| 214 | const auto input_decorator{is_array ? fmt::format("gl_in[{}].", vertex) : ""}; | ||
| 215 | ctx.AddF32("{}={}{}.{};", inst, input_decorator, ctx.position_name, swizzle); | ||
| 216 | break; | ||
| 217 | } | ||
| 218 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 219 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 220 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 221 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 222 | if (ctx.stage == Stage::Fragment) { | ||
| 223 | ctx.AddF32("{}=gl_Color.{};", inst, swizzle); | ||
| 224 | } else { | ||
| 225 | ctx.AddF32("{}=gl_FrontColor.{};", inst, swizzle); | ||
| 226 | } | ||
| 227 | break; | ||
| 228 | case IR::Attribute::PointSpriteS: | ||
| 229 | case IR::Attribute::PointSpriteT: | ||
| 230 | ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle); | ||
| 231 | break; | ||
| 232 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 233 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 234 | ctx.AddF32("{}=gl_TessCoord.{};", inst, swizzle); | ||
| 235 | break; | ||
| 236 | case IR::Attribute::InstanceId: | ||
| 237 | ctx.AddF32("{}=itof(gl_InstanceID);", inst); | ||
| 238 | break; | ||
| 239 | case IR::Attribute::VertexId: | ||
| 240 | ctx.AddF32("{}=itof(gl_VertexID);", inst); | ||
| 241 | break; | ||
| 242 | case IR::Attribute::FrontFace: | ||
| 243 | ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst); | ||
| 244 | break; | ||
| 245 | default: | ||
| 246 | throw NotImplementedException("Get attribute {}", attr); | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, | ||
| 251 | [[maybe_unused]] std::string_view vertex) { | ||
| 252 | if (IR::IsGeneric(attr)) { | ||
| 253 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 254 | const u32 attr_element{IR::GenericAttributeElement(attr)}; | ||
| 255 | const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)}; | ||
| 256 | const auto output_decorator{OutputVertexIndex(ctx)}; | ||
| 257 | if (info.num_components == 1) { | ||
| 258 | ctx.Add("{}{}={};", info.name, output_decorator, value); | ||
| 259 | } else { | ||
| 260 | const u32 index_element{attr_element - info.first_element}; | ||
| 261 | ctx.Add("{}{}.{}={};", info.name, output_decorator, "xyzw"[index_element], value); | ||
| 262 | } | ||
| 263 | return; | ||
| 264 | } | ||
| 265 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 266 | const char swizzle{"xyzw"[element]}; | ||
| 267 | // GLSL only exposes 8 legacy texcoords | ||
| 268 | if (attr >= IR::Attribute::FixedFncTexture8S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 269 | LOG_WARNING(Shader_GLSL, "GLSL does not allow access to gl_TexCoord[{}]", | ||
| 270 | TexCoordIndex(attr)); | ||
| 271 | return; | ||
| 272 | } | ||
| 273 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture7Q) { | ||
| 274 | const u32 index{TexCoordIndex(attr)}; | ||
| 275 | ctx.Add("gl_TexCoord[{}].{}={};", index, swizzle, value); | ||
| 276 | return; | ||
| 277 | } | ||
| 278 | switch (attr) { | ||
| 279 | case IR::Attribute::Layer: | ||
| 280 | if (ctx.stage != Stage::Geometry && | ||
| 281 | !ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 282 | LOG_WARNING(Shader_GLSL, "Shader stores viewport layer but device does not support " | ||
| 283 | "viewport layer extension"); | ||
| 284 | break; | ||
| 285 | } | ||
| 286 | ctx.Add("gl_Layer=ftoi({});", value); | ||
| 287 | break; | ||
| 288 | case IR::Attribute::ViewportIndex: | ||
| 289 | if (ctx.stage != Stage::Geometry && | ||
| 290 | !ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 291 | LOG_WARNING(Shader_GLSL, "Shader stores viewport index but device does not support " | ||
| 292 | "viewport layer extension"); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | ctx.Add("gl_ViewportIndex=ftoi({});", value); | ||
| 296 | break; | ||
| 297 | case IR::Attribute::ViewportMask: | ||
| 298 | if (ctx.stage != Stage::Geometry && !ctx.profile.support_viewport_mask) { | ||
| 299 | LOG_WARNING( | ||
| 300 | Shader_GLSL, | ||
| 301 | "Shader stores viewport mask but device does not support viewport mask extension"); | ||
| 302 | break; | ||
| 303 | } | ||
| 304 | ctx.Add("gl_ViewportMask[0]=ftoi({});", value); | ||
| 305 | break; | ||
| 306 | case IR::Attribute::PointSize: | ||
| 307 | ctx.Add("gl_PointSize={};", value); | ||
| 308 | break; | ||
| 309 | case IR::Attribute::PositionX: | ||
| 310 | case IR::Attribute::PositionY: | ||
| 311 | case IR::Attribute::PositionZ: | ||
| 312 | case IR::Attribute::PositionW: | ||
| 313 | ctx.Add("gl_Position.{}={};", swizzle, value); | ||
| 314 | break; | ||
| 315 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 316 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 317 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 318 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 319 | ctx.Add("gl_FrontColor.{}={};", swizzle, value); | ||
| 320 | break; | ||
| 321 | case IR::Attribute::ColorFrontSpecularR: | ||
| 322 | case IR::Attribute::ColorFrontSpecularG: | ||
| 323 | case IR::Attribute::ColorFrontSpecularB: | ||
| 324 | case IR::Attribute::ColorFrontSpecularA: | ||
| 325 | ctx.Add("gl_FrontSecondaryColor.{}={};", swizzle, value); | ||
| 326 | break; | ||
| 327 | case IR::Attribute::ColorBackDiffuseR: | ||
| 328 | case IR::Attribute::ColorBackDiffuseG: | ||
| 329 | case IR::Attribute::ColorBackDiffuseB: | ||
| 330 | case IR::Attribute::ColorBackDiffuseA: | ||
| 331 | ctx.Add("gl_BackColor.{}={};", swizzle, value); | ||
| 332 | break; | ||
| 333 | case IR::Attribute::ColorBackSpecularR: | ||
| 334 | case IR::Attribute::ColorBackSpecularG: | ||
| 335 | case IR::Attribute::ColorBackSpecularB: | ||
| 336 | case IR::Attribute::ColorBackSpecularA: | ||
| 337 | ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); | ||
| 338 | break; | ||
| 339 | case IR::Attribute::FogCoordinate: | ||
| 340 | ctx.Add("gl_FogFragCoord={};", value); | ||
| 341 | break; | ||
| 342 | case IR::Attribute::ClipDistance0: | ||
| 343 | case IR::Attribute::ClipDistance1: | ||
| 344 | case IR::Attribute::ClipDistance2: | ||
| 345 | case IR::Attribute::ClipDistance3: | ||
| 346 | case IR::Attribute::ClipDistance4: | ||
| 347 | case IR::Attribute::ClipDistance5: | ||
| 348 | case IR::Attribute::ClipDistance6: | ||
| 349 | case IR::Attribute::ClipDistance7: { | ||
| 350 | const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 351 | ctx.Add("gl_ClipDistance[{}]={};", index, value); | ||
| 352 | break; | ||
| 353 | } | ||
| 354 | default: | ||
| 355 | throw NotImplementedException("Set attribute {}", attr); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||
| 360 | std::string_view vertex) { | ||
| 361 | const bool is_array{ctx.stage == Stage::Geometry}; | ||
| 362 | const auto vertex_arg{is_array ? fmt::format(",{}", vertex) : ""}; | ||
| 363 | ctx.AddF32("{}=IndexedAttrLoad(int({}){});", inst, offset, vertex_arg); | ||
| 364 | } | ||
| 365 | |||
| 366 | void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, | ||
| 367 | [[maybe_unused]] std::string_view offset, | ||
| 368 | [[maybe_unused]] std::string_view value, | ||
| 369 | [[maybe_unused]] std::string_view vertex) { | ||
| 370 | NotImplemented(); | ||
| 371 | } | ||
| 372 | |||
| 373 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { | ||
| 374 | if (!IR::IsGeneric(patch)) { | ||
| 375 | throw NotImplementedException("Non-generic patch load"); | ||
| 376 | } | ||
| 377 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 378 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 379 | const char swizzle{"xyzw"[element]}; | ||
| 380 | ctx.AddF32("{}=patch{}.{};", inst, index, swizzle); | ||
| 381 | } | ||
| 382 | |||
| 383 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value) { | ||
| 384 | if (IR::IsGeneric(patch)) { | ||
| 385 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 386 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 387 | ctx.Add("patch{}.{}={};", index, "xyzw"[element], value); | ||
| 388 | return; | ||
| 389 | } | ||
| 390 | switch (patch) { | ||
| 391 | case IR::Patch::TessellationLodLeft: | ||
| 392 | case IR::Patch::TessellationLodRight: | ||
| 393 | case IR::Patch::TessellationLodTop: | ||
| 394 | case IR::Patch::TessellationLodBottom: { | ||
| 395 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 396 | ctx.Add("gl_TessLevelOuter[{}]={};", index, value); | ||
| 397 | break; | ||
| 398 | } | ||
| 399 | case IR::Patch::TessellationLodInteriorU: | ||
| 400 | ctx.Add("gl_TessLevelInner[0]={};", value); | ||
| 401 | break; | ||
| 402 | case IR::Patch::TessellationLodInteriorV: | ||
| 403 | ctx.Add("gl_TessLevelInner[1]={};", value); | ||
| 404 | break; | ||
| 405 | default: | ||
| 406 | throw NotImplementedException("Patch {}", patch); | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 410 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value) { | ||
| 411 | const char swizzle{"xyzw"[component]}; | ||
| 412 | ctx.Add("frag_color{}.{}={};", index, swizzle, value); | ||
| 413 | } | ||
| 414 | |||
| 415 | void EmitSetSampleMask(EmitContext& ctx, std::string_view value) { | ||
| 416 | ctx.Add("gl_SampleMask[0]=int({});", value); | ||
| 417 | } | ||
| 418 | |||
| 419 | void EmitSetFragDepth(EmitContext& ctx, std::string_view value) { | ||
| 420 | ctx.Add("gl_FragDepth={};", value); | ||
| 421 | } | ||
| 422 | |||
| 423 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 424 | ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); | ||
| 425 | } | ||
| 426 | |||
| 427 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { | ||
| 428 | ctx.AddU32x3("{}=gl_WorkGroupID;", inst); | ||
| 429 | } | ||
| 430 | |||
| 431 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 432 | ctx.AddU32("{}=uint(gl_InvocationID);", inst); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { | ||
| 436 | ctx.AddU32("{}=uint(gl_SampleID);", inst); | ||
| 437 | } | ||
| 438 | |||
| 439 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { | ||
| 440 | ctx.AddU1("{}=gl_HelperInvocation;", inst); | ||
| 441 | } | ||
| 442 | |||
| 443 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | ||
| 444 | ctx.uses_y_direction = true; | ||
| 445 | ctx.AddF32("{}=gl_FrontMaterial.ambient.a;", inst); | ||
| 446 | } | ||
| 447 | |||
| 448 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset) { | ||
| 449 | ctx.AddU32("{}=lmem[{}];", inst, word_offset); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value) { | ||
| 453 | ctx.Add("lmem[{}]={};", word_offset, value); | ||
| 454 | } | ||
| 455 | |||
| 456 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp new file mode 100644 index 000000000..53f8896be --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_control_flow.cpp | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/exception.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | |||
| 13 | void EmitJoin(EmitContext&) { | ||
| 14 | throw NotImplementedException("Join shouldn't be emitted"); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 18 | ctx.Add("discard;"); | ||
| 19 | } | ||
| 20 | |||
| 21 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp new file mode 100644 index 000000000..eeae6562c --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_convert.cpp | |||
| @@ -0,0 +1,230 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | void EmitConvertS16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 13 | [[maybe_unused]] std::string_view value) { | ||
| 14 | NotImplemented(); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 18 | ctx.AddU32("{}=(int({})&0xffff)|(bitfieldExtract(int({}),31,1)<<15);", inst, value, value); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitConvertS16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 22 | [[maybe_unused]] std::string_view value) { | ||
| 23 | NotImplemented(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void EmitConvertS32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 27 | [[maybe_unused]] std::string_view value) { | ||
| 28 | NotImplemented(); | ||
| 29 | } | ||
| 30 | |||
| 31 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 32 | ctx.AddU32("{}=int({});", inst, value); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 36 | ctx.AddU32("{}=int({});", inst, value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitConvertS64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 40 | [[maybe_unused]] std::string_view value) { | ||
| 41 | NotImplemented(); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 45 | ctx.AddU64("{}=int64_t({});", inst, value); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 49 | ctx.AddU64("{}=int64_t({});", inst, value); | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitConvertU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 53 | [[maybe_unused]] std::string_view value) { | ||
| 54 | NotImplemented(); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitConvertU16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 58 | [[maybe_unused]] std::string_view value) { | ||
| 59 | NotImplemented(); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitConvertU16F64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 63 | [[maybe_unused]] std::string_view value) { | ||
| 64 | NotImplemented(); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitConvertU32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 68 | [[maybe_unused]] std::string_view value) { | ||
| 69 | NotImplemented(); | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 73 | ctx.AddU32("{}=uint({});", inst, value); | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 77 | ctx.AddU32("{}=uint({});", inst, value); | ||
| 78 | } | ||
| 79 | |||
| 80 | void EmitConvertU64F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 81 | [[maybe_unused]] std::string_view value) { | ||
| 82 | NotImplemented(); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 86 | ctx.AddU64("{}=uint64_t({});", inst, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 90 | ctx.AddU64("{}=uint64_t({});", inst, value); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 94 | ctx.AddU64("{}=uint64_t({});", inst, value); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 98 | ctx.AddU32("{}=uint({});", inst, value); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitConvertF16F32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 102 | [[maybe_unused]] std::string_view value) { | ||
| 103 | NotImplemented(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void EmitConvertF32F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 107 | [[maybe_unused]] std::string_view value) { | ||
| 108 | NotImplemented(); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 112 | ctx.AddF32("{}=float({});", inst, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 116 | ctx.AddF64("{}=double({});", inst, value); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitConvertF16S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 120 | [[maybe_unused]] std::string_view value) { | ||
| 121 | NotImplemented(); | ||
| 122 | } | ||
| 123 | |||
| 124 | void EmitConvertF16S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 125 | [[maybe_unused]] std::string_view value) { | ||
| 126 | NotImplemented(); | ||
| 127 | } | ||
| 128 | |||
| 129 | void EmitConvertF16S32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 130 | [[maybe_unused]] std::string_view value) { | ||
| 131 | NotImplemented(); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitConvertF16S64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 135 | [[maybe_unused]] std::string_view value) { | ||
| 136 | NotImplemented(); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitConvertF16U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 140 | [[maybe_unused]] std::string_view value) { | ||
| 141 | NotImplemented(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitConvertF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 145 | [[maybe_unused]] std::string_view value) { | ||
| 146 | NotImplemented(); | ||
| 147 | } | ||
| 148 | |||
| 149 | void EmitConvertF16U32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 150 | [[maybe_unused]] std::string_view value) { | ||
| 151 | NotImplemented(); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitConvertF16U64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 155 | [[maybe_unused]] std::string_view value) { | ||
| 156 | NotImplemented(); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitConvertF32S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 160 | [[maybe_unused]] std::string_view value) { | ||
| 161 | NotImplemented(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitConvertF32S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 165 | [[maybe_unused]] std::string_view value) { | ||
| 166 | NotImplemented(); | ||
| 167 | } | ||
| 168 | |||
| 169 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 170 | ctx.AddF32("{}=float(int({}));", inst, value); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 174 | ctx.AddF32("{}=float(int64_t({}));", inst, value); | ||
| 175 | } | ||
| 176 | |||
| 177 | void EmitConvertF32U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 178 | [[maybe_unused]] std::string_view value) { | ||
| 179 | NotImplemented(); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 183 | ctx.AddF32("{}=float({}&0xffff);", inst, value); | ||
| 184 | } | ||
| 185 | |||
| 186 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 187 | ctx.AddF32("{}=float({});", inst, value); | ||
| 188 | } | ||
| 189 | |||
| 190 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 191 | ctx.AddF32("{}=float({});", inst, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitConvertF64S8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 195 | [[maybe_unused]] std::string_view value) { | ||
| 196 | NotImplemented(); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitConvertF64S16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 200 | [[maybe_unused]] std::string_view value) { | ||
| 201 | NotImplemented(); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 205 | ctx.AddF64("{}=double(int({}));", inst, value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 209 | ctx.AddF64("{}=double(int64_t({}));", inst, value); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitConvertF64U8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 213 | [[maybe_unused]] std::string_view value) { | ||
| 214 | NotImplemented(); | ||
| 215 | } | ||
| 216 | |||
| 217 | void EmitConvertF64U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 218 | [[maybe_unused]] std::string_view value) { | ||
| 219 | NotImplemented(); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 223 | ctx.AddF64("{}=double({});", inst, value); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 227 | ctx.AddF64("{}=double({});", inst, value); | ||
| 228 | } | ||
| 229 | |||
| 230 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp new file mode 100644 index 000000000..d423bfb1b --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_floating_point.cpp | |||
| @@ -0,0 +1,456 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | void Compare(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs, | ||
| 15 | std::string_view op, bool ordered) { | ||
| 16 | const auto nan_op{ordered ? "&&!" : "||"}; | ||
| 17 | ctx.AddU1("{}={}{}{}" | ||
| 18 | "{}isnan({}){}isnan({});", | ||
| 19 | inst, lhs, op, rhs, nan_op, lhs, nan_op, rhs); | ||
| 20 | } | ||
| 21 | |||
| 22 | bool IsPrecise(const IR::Inst& inst) { | ||
| 23 | return inst.Flags<IR::FpControl>().no_contraction; | ||
| 24 | } | ||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 28 | [[maybe_unused]] std::string_view value) { | ||
| 29 | NotImplemented(); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 33 | ctx.AddF32("{}=abs({});", inst, value); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 37 | ctx.AddF64("{}=abs({});", inst, value); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 41 | [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { | ||
| 42 | NotImplemented(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 46 | if (IsPrecise(inst)) { | ||
| 47 | ctx.AddPrecF32("{}={}+{};", inst, a, b); | ||
| 48 | } else { | ||
| 49 | ctx.AddF32("{}={}+{};", inst, a, b); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 54 | if (IsPrecise(inst)) { | ||
| 55 | ctx.AddPrecF64("{}={}+{};", inst, a, b); | ||
| 56 | } else { | ||
| 57 | ctx.AddF64("{}={}+{};", inst, a, b); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 62 | [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b, | ||
| 63 | [[maybe_unused]] std::string_view c) { | ||
| 64 | NotImplemented(); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 68 | std::string_view c) { | ||
| 69 | if (IsPrecise(inst)) { | ||
| 70 | ctx.AddPrecF32("{}=fma({},{},{});", inst, a, b, c); | ||
| 71 | } else { | ||
| 72 | ctx.AddF32("{}=fma({},{},{});", inst, a, b, c); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 77 | std::string_view c) { | ||
| 78 | if (IsPrecise(inst)) { | ||
| 79 | ctx.AddPrecF64("{}=fma({},{},{});", inst, a, b, c); | ||
| 80 | } else { | ||
| 81 | ctx.AddF64("{}=fma({},{},{});", inst, a, b, c); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 86 | ctx.AddF32("{}=max({},{});", inst, a, b); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 90 | ctx.AddF64("{}=max({},{});", inst, a, b); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 94 | ctx.AddF32("{}=min({},{});", inst, a, b); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 98 | ctx.AddF64("{}=min({},{});", inst, a, b); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 102 | [[maybe_unused]] std::string_view a, [[maybe_unused]] std::string_view b) { | ||
| 103 | NotImplemented(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 107 | if (IsPrecise(inst)) { | ||
| 108 | ctx.AddPrecF32("{}={}*{};", inst, a, b); | ||
| 109 | } else { | ||
| 110 | ctx.AddF32("{}={}*{};", inst, a, b); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 115 | if (IsPrecise(inst)) { | ||
| 116 | ctx.AddPrecF64("{}={}*{};", inst, a, b); | ||
| 117 | } else { | ||
| 118 | ctx.AddF64("{}={}*{};", inst, a, b); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 123 | [[maybe_unused]] std::string_view value) { | ||
| 124 | NotImplemented(); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 128 | ctx.AddF32("{}=-({});", inst, value); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 132 | ctx.AddF64("{}=-({});", inst, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 136 | ctx.AddF32("{}=sin({});", inst, value); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 140 | ctx.AddF32("{}=cos({});", inst, value); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 144 | ctx.AddF32("{}=exp2({});", inst, value); | ||
| 145 | } | ||
| 146 | |||
| 147 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 148 | ctx.AddF32("{}=log2({});", inst, value); | ||
| 149 | } | ||
| 150 | |||
| 151 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 152 | ctx.AddF32("{}=(1.0f)/{};", inst, value); | ||
| 153 | } | ||
| 154 | |||
| 155 | void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 156 | ctx.AddF64("{}=1.0/{};", inst, value); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitFPRecipSqrt32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 160 | [[maybe_unused]] std::string_view value) { | ||
| 161 | ctx.AddF32("{}=inversesqrt({});", inst, value); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 165 | [[maybe_unused]] std::string_view value) { | ||
| 166 | NotImplemented(); | ||
| 167 | } | ||
| 168 | |||
| 169 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 170 | ctx.AddF32("{}=sqrt({});", inst, value); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 174 | [[maybe_unused]] std::string_view value) { | ||
| 175 | NotImplemented(); | ||
| 176 | } | ||
| 177 | |||
| 178 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 179 | ctx.AddF32("{}=min(max({},0.0),1.0);", inst, value); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 183 | ctx.AddF64("{}=min(max({},0.0),1.0);", inst, value); | ||
| 184 | } | ||
| 185 | |||
| 186 | void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 187 | [[maybe_unused]] std::string_view value, | ||
| 188 | [[maybe_unused]] std::string_view min_value, | ||
| 189 | [[maybe_unused]] std::string_view max_value) { | ||
| 190 | NotImplemented(); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 194 | std::string_view min_value, std::string_view max_value) { | ||
| 195 | // GLSL's clamp does not produce desirable results | ||
| 196 | ctx.AddF32("{}=min(max({},float({})),float({}));", inst, value, min_value, max_value); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 200 | std::string_view min_value, std::string_view max_value) { | ||
| 201 | // GLSL's clamp does not produce desirable results | ||
| 202 | ctx.AddF64("{}=min(max({},double({})),double({}));", inst, value, min_value, max_value); | ||
| 203 | } | ||
| 204 | |||
| 205 | void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 206 | [[maybe_unused]] std::string_view value) { | ||
| 207 | NotImplemented(); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 211 | ctx.AddF32("{}=roundEven({});", inst, value); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 215 | ctx.AddF64("{}=roundEven({});", inst, value); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 219 | [[maybe_unused]] std::string_view value) { | ||
| 220 | NotImplemented(); | ||
| 221 | } | ||
| 222 | |||
| 223 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 224 | ctx.AddF32("{}=floor({});", inst, value); | ||
| 225 | } | ||
| 226 | |||
| 227 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 228 | ctx.AddF64("{}=floor({});", inst, value); | ||
| 229 | } | ||
| 230 | |||
| 231 | void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 232 | [[maybe_unused]] std::string_view value) { | ||
| 233 | NotImplemented(); | ||
| 234 | } | ||
| 235 | |||
| 236 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 237 | ctx.AddF32("{}=ceil({});", inst, value); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 241 | ctx.AddF64("{}=ceil({});", inst, value); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 245 | [[maybe_unused]] std::string_view value) { | ||
| 246 | NotImplemented(); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 250 | ctx.AddF32("{}=trunc({});", inst, value); | ||
| 251 | } | ||
| 252 | |||
| 253 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 254 | ctx.AddF64("{}=trunc({});", inst, value); | ||
| 255 | } | ||
| 256 | |||
| 257 | void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 258 | [[maybe_unused]] std::string_view rhs) { | ||
| 259 | NotImplemented(); | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 263 | std::string_view rhs) { | ||
| 264 | Compare(ctx, inst, lhs, rhs, "==", true); | ||
| 265 | } | ||
| 266 | |||
| 267 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 268 | std::string_view rhs) { | ||
| 269 | Compare(ctx, inst, lhs, rhs, "==", true); | ||
| 270 | } | ||
| 271 | |||
| 272 | void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 273 | [[maybe_unused]] std::string_view rhs) { | ||
| 274 | NotImplemented(); | ||
| 275 | } | ||
| 276 | |||
| 277 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 278 | std::string_view rhs) { | ||
| 279 | Compare(ctx, inst, lhs, rhs, "==", false); | ||
| 280 | } | ||
| 281 | |||
| 282 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 283 | std::string_view rhs) { | ||
| 284 | Compare(ctx, inst, lhs, rhs, "==", false); | ||
| 285 | } | ||
| 286 | |||
| 287 | void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 288 | [[maybe_unused]] std::string_view rhs) { | ||
| 289 | NotImplemented(); | ||
| 290 | } | ||
| 291 | |||
| 292 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 293 | std::string_view rhs) { | ||
| 294 | Compare(ctx, inst, lhs, rhs, "!=", true); | ||
| 295 | } | ||
| 296 | |||
| 297 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 298 | std::string_view rhs) { | ||
| 299 | Compare(ctx, inst, lhs, rhs, "!=", true); | ||
| 300 | } | ||
| 301 | |||
| 302 | void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 303 | [[maybe_unused]] std::string_view rhs) { | ||
| 304 | NotImplemented(); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 308 | std::string_view rhs) { | ||
| 309 | Compare(ctx, inst, lhs, rhs, "!=", false); | ||
| 310 | } | ||
| 311 | |||
| 312 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 313 | std::string_view rhs) { | ||
| 314 | Compare(ctx, inst, lhs, rhs, "!=", false); | ||
| 315 | } | ||
| 316 | |||
| 317 | void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 318 | [[maybe_unused]] std::string_view rhs) { | ||
| 319 | NotImplemented(); | ||
| 320 | } | ||
| 321 | |||
| 322 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 323 | std::string_view rhs) { | ||
| 324 | Compare(ctx, inst, lhs, rhs, "<", true); | ||
| 325 | } | ||
| 326 | |||
| 327 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 328 | std::string_view rhs) { | ||
| 329 | Compare(ctx, inst, lhs, rhs, "<", true); | ||
| 330 | } | ||
| 331 | |||
| 332 | void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view lhs, | ||
| 333 | [[maybe_unused]] std::string_view rhs) { | ||
| 334 | NotImplemented(); | ||
| 335 | } | ||
| 336 | |||
| 337 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 338 | std::string_view rhs) { | ||
| 339 | Compare(ctx, inst, lhs, rhs, "<", false); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 343 | std::string_view rhs) { | ||
| 344 | Compare(ctx, inst, lhs, rhs, "<", false); | ||
| 345 | } | ||
| 346 | |||
| 347 | void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, | ||
| 348 | [[maybe_unused]] std::string_view lhs, | ||
| 349 | [[maybe_unused]] std::string_view rhs) { | ||
| 350 | NotImplemented(); | ||
| 351 | } | ||
| 352 | |||
| 353 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 354 | std::string_view rhs) { | ||
| 355 | Compare(ctx, inst, lhs, rhs, ">", true); | ||
| 356 | } | ||
| 357 | |||
| 358 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 359 | std::string_view rhs) { | ||
| 360 | Compare(ctx, inst, lhs, rhs, ">", true); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, | ||
| 364 | [[maybe_unused]] std::string_view lhs, | ||
| 365 | [[maybe_unused]] std::string_view rhs) { | ||
| 366 | NotImplemented(); | ||
| 367 | } | ||
| 368 | |||
| 369 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 370 | std::string_view rhs) { | ||
| 371 | Compare(ctx, inst, lhs, rhs, ">", false); | ||
| 372 | } | ||
| 373 | |||
| 374 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 375 | std::string_view rhs) { | ||
| 376 | Compare(ctx, inst, lhs, rhs, ">", false); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 380 | [[maybe_unused]] std::string_view lhs, | ||
| 381 | [[maybe_unused]] std::string_view rhs) { | ||
| 382 | NotImplemented(); | ||
| 383 | } | ||
| 384 | |||
| 385 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 386 | std::string_view rhs) { | ||
| 387 | Compare(ctx, inst, lhs, rhs, "<=", true); | ||
| 388 | } | ||
| 389 | |||
| 390 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 391 | std::string_view rhs) { | ||
| 392 | Compare(ctx, inst, lhs, rhs, "<=", true); | ||
| 393 | } | ||
| 394 | |||
| 395 | void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 396 | [[maybe_unused]] std::string_view lhs, | ||
| 397 | [[maybe_unused]] std::string_view rhs) { | ||
| 398 | NotImplemented(); | ||
| 399 | } | ||
| 400 | |||
| 401 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 402 | std::string_view rhs) { | ||
| 403 | Compare(ctx, inst, lhs, rhs, "<=", false); | ||
| 404 | } | ||
| 405 | |||
| 406 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 407 | std::string_view rhs) { | ||
| 408 | Compare(ctx, inst, lhs, rhs, "<=", false); | ||
| 409 | } | ||
| 410 | |||
| 411 | void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 412 | [[maybe_unused]] std::string_view lhs, | ||
| 413 | [[maybe_unused]] std::string_view rhs) { | ||
| 414 | NotImplemented(); | ||
| 415 | } | ||
| 416 | |||
| 417 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 418 | std::string_view rhs) { | ||
| 419 | Compare(ctx, inst, lhs, rhs, ">=", true); | ||
| 420 | } | ||
| 421 | |||
| 422 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 423 | std::string_view rhs) { | ||
| 424 | Compare(ctx, inst, lhs, rhs, ">=", true); | ||
| 425 | } | ||
| 426 | |||
| 427 | void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, | ||
| 428 | [[maybe_unused]] std::string_view lhs, | ||
| 429 | [[maybe_unused]] std::string_view rhs) { | ||
| 430 | NotImplemented(); | ||
| 431 | } | ||
| 432 | |||
| 433 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 434 | std::string_view rhs) { | ||
| 435 | Compare(ctx, inst, lhs, rhs, ">=", false); | ||
| 436 | } | ||
| 437 | |||
| 438 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 439 | std::string_view rhs) { | ||
| 440 | Compare(ctx, inst, lhs, rhs, ">=", false); | ||
| 441 | } | ||
| 442 | |||
| 443 | void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 444 | [[maybe_unused]] std::string_view value) { | ||
| 445 | NotImplemented(); | ||
| 446 | } | ||
| 447 | |||
| 448 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 449 | ctx.AddU1("{}=isnan({});", inst, value); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 453 | ctx.AddU1("{}=isnan({});", inst, value); | ||
| 454 | } | ||
| 455 | |||
| 456 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp new file mode 100644 index 000000000..447eb8e0a --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -0,0 +1,799 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLSL { | ||
| 14 | namespace { | ||
| 15 | std::string Texture(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { | ||
| 16 | const auto def{info.type == TextureType::Buffer ? ctx.texture_buffers.at(info.descriptor_index) | ||
| 17 | : ctx.textures.at(info.descriptor_index)}; | ||
| 18 | const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; | ||
| 19 | return fmt::format("tex{}{}", def.binding, index_offset); | ||
| 20 | } | ||
| 21 | |||
| 22 | std::string Image(EmitContext& ctx, const IR::TextureInstInfo& info, const IR::Value& index) { | ||
| 23 | const auto def{info.type == TextureType::Buffer ? ctx.image_buffers.at(info.descriptor_index) | ||
| 24 | : ctx.images.at(info.descriptor_index)}; | ||
| 25 | const auto index_offset{def.count > 1 ? fmt::format("[{}]", ctx.var_alloc.Consume(index)) : ""}; | ||
| 26 | return fmt::format("img{}{}", def.binding, index_offset); | ||
| 27 | } | ||
| 28 | |||
| 29 | std::string CastToIntVec(std::string_view value, const IR::TextureInstInfo& info) { | ||
| 30 | switch (info.type) { | ||
| 31 | case TextureType::Color1D: | ||
| 32 | case TextureType::Buffer: | ||
| 33 | return fmt::format("int({})", value); | ||
| 34 | case TextureType::ColorArray1D: | ||
| 35 | case TextureType::Color2D: | ||
| 36 | case TextureType::ColorArray2D: | ||
| 37 | return fmt::format("ivec2({})", value); | ||
| 38 | case TextureType::Color3D: | ||
| 39 | case TextureType::ColorCube: | ||
| 40 | return fmt::format("ivec3({})", value); | ||
| 41 | case TextureType::ColorArrayCube: | ||
| 42 | return fmt::format("ivec4({})", value); | ||
| 43 | default: | ||
| 44 | throw NotImplementedException("Integer cast for TextureType {}", info.type.Value()); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | std::string CoordsCastToInt(std::string_view value, const IR::TextureInstInfo& info) { | ||
| 49 | switch (info.type) { | ||
| 50 | case TextureType::Color1D: | ||
| 51 | case TextureType::Buffer: | ||
| 52 | return fmt::format("int({})", value); | ||
| 53 | case TextureType::ColorArray1D: | ||
| 54 | case TextureType::Color2D: | ||
| 55 | return fmt::format("ivec2({})", value); | ||
| 56 | case TextureType::ColorArray2D: | ||
| 57 | case TextureType::Color3D: | ||
| 58 | case TextureType::ColorCube: | ||
| 59 | return fmt::format("ivec3({})", value); | ||
| 60 | case TextureType::ColorArrayCube: | ||
| 61 | return fmt::format("ivec4({})", value); | ||
| 62 | default: | ||
| 63 | throw NotImplementedException("TexelFetchCast type {}", info.type.Value()); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | bool NeedsShadowLodExt(TextureType type) { | ||
| 68 | switch (type) { | ||
| 69 | case TextureType::ColorArray2D: | ||
| 70 | case TextureType::ColorCube: | ||
| 71 | case TextureType::ColorArrayCube: | ||
| 72 | return true; | ||
| 73 | default: | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string GetOffsetVec(EmitContext& ctx, const IR::Value& offset) { | ||
| 79 | if (offset.IsImmediate()) { | ||
| 80 | return fmt::format("int({})", offset.U32()); | ||
| 81 | } | ||
| 82 | IR::Inst* const inst{offset.InstRecursive()}; | ||
| 83 | if (inst->AreAllArgsImmediates()) { | ||
| 84 | switch (inst->GetOpcode()) { | ||
| 85 | case IR::Opcode::CompositeConstructU32x2: | ||
| 86 | return fmt::format("ivec2({},{})", inst->Arg(0).U32(), inst->Arg(1).U32()); | ||
| 87 | case IR::Opcode::CompositeConstructU32x3: | ||
| 88 | return fmt::format("ivec3({},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), | ||
| 89 | inst->Arg(2).U32()); | ||
| 90 | case IR::Opcode::CompositeConstructU32x4: | ||
| 91 | return fmt::format("ivec4({},{},{},{})", inst->Arg(0).U32(), inst->Arg(1).U32(), | ||
| 92 | inst->Arg(2).U32(), inst->Arg(3).U32()); | ||
| 93 | default: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | } | ||
| 97 | const bool has_var_aoffi{ctx.profile.support_gl_variable_aoffi}; | ||
| 98 | if (!has_var_aoffi) { | ||
| 99 | LOG_WARNING(Shader_GLSL, "Device does not support variable texture offsets, STUBBING"); | ||
| 100 | } | ||
| 101 | const auto offset_str{has_var_aoffi ? ctx.var_alloc.Consume(offset) : "0"}; | ||
| 102 | switch (offset.Type()) { | ||
| 103 | case IR::Type::U32: | ||
| 104 | return fmt::format("int({})", offset_str); | ||
| 105 | case IR::Type::U32x2: | ||
| 106 | return fmt::format("ivec2({})", offset_str); | ||
| 107 | case IR::Type::U32x3: | ||
| 108 | return fmt::format("ivec3({})", offset_str); | ||
| 109 | case IR::Type::U32x4: | ||
| 110 | return fmt::format("ivec4({})", offset_str); | ||
| 111 | default: | ||
| 112 | throw NotImplementedException("Offset type {}", offset.Type()); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | std::string PtpOffsets(const IR::Value& offset, const IR::Value& offset2) { | ||
| 117 | const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; | ||
| 118 | if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { | ||
| 119 | LOG_WARNING(Shader_GLSL, "Not all arguments in PTP are immediate, STUBBING"); | ||
| 120 | return "ivec2[](ivec2(0), ivec2(1), ivec2(2), ivec2(3))"; | ||
| 121 | } | ||
| 122 | const IR::Opcode opcode{values[0]->GetOpcode()}; | ||
| 123 | if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { | ||
| 124 | throw LogicError("Invalid PTP arguments"); | ||
| 125 | } | ||
| 126 | auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; | ||
| 127 | |||
| 128 | return fmt::format("ivec2[](ivec2({},{}),ivec2({},{}),ivec2({},{}),ivec2({},{}))", read(0, 0), | ||
| 129 | read(0, 1), read(0, 2), read(0, 3), read(1, 0), read(1, 1), read(1, 2), | ||
| 130 | read(1, 3)); | ||
| 131 | } | ||
| 132 | |||
| 133 | IR::Inst* PrepareSparse(IR::Inst& inst) { | ||
| 134 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 135 | if (sparse_inst) { | ||
| 136 | sparse_inst->Invalidate(); | ||
| 137 | } | ||
| 138 | return sparse_inst; | ||
| 139 | } | ||
| 140 | } // Anonymous namespace | ||
| 141 | |||
| 142 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 143 | std::string_view coords, std::string_view bias_lc, | ||
| 144 | const IR::Value& offset) { | ||
| 145 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 146 | if (info.has_lod_clamp) { | ||
| 147 | throw NotImplementedException("EmitImageSampleImplicitLod Lod clamp samples"); | ||
| 148 | } | ||
| 149 | const auto texture{Texture(ctx, info, index)}; | ||
| 150 | const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; | ||
| 151 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 152 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 153 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 154 | if (sparse_inst && !supports_sparse) { | ||
| 155 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 156 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 157 | } | ||
| 158 | if (!sparse_inst || !supports_sparse) { | ||
| 159 | if (!offset.IsEmpty()) { | ||
| 160 | const auto offset_str{GetOffsetVec(ctx, offset)}; | ||
| 161 | if (ctx.stage == Stage::Fragment) { | ||
| 162 | ctx.Add("{}=textureOffset({},{},{}{});", texel, texture, coords, offset_str, bias); | ||
| 163 | } else { | ||
| 164 | ctx.Add("{}=textureLodOffset({},{},0.0,{});", texel, texture, coords, offset_str); | ||
| 165 | } | ||
| 166 | } else { | ||
| 167 | if (ctx.stage == Stage::Fragment) { | ||
| 168 | ctx.Add("{}=texture({},{}{});", texel, texture, coords, bias); | ||
| 169 | } else { | ||
| 170 | ctx.Add("{}=textureLod({},{},0.0);", texel, texture, coords); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | return; | ||
| 174 | } | ||
| 175 | if (!offset.IsEmpty()) { | ||
| 176 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureOffsetARB({},{},{},{}{}));", | ||
| 177 | *sparse_inst, texture, coords, GetOffsetVec(ctx, offset), texel, bias); | ||
| 178 | } else { | ||
| 179 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureARB({},{},{}{}));", *sparse_inst, | ||
| 180 | texture, coords, texel, bias); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 185 | std::string_view coords, std::string_view lod_lc, | ||
| 186 | const IR::Value& offset) { | ||
| 187 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 188 | if (info.has_bias) { | ||
| 189 | throw NotImplementedException("EmitImageSampleExplicitLod Bias texture samples"); | ||
| 190 | } | ||
| 191 | if (info.has_lod_clamp) { | ||
| 192 | throw NotImplementedException("EmitImageSampleExplicitLod Lod clamp samples"); | ||
| 193 | } | ||
| 194 | const auto texture{Texture(ctx, info, index)}; | ||
| 195 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 196 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 197 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 198 | if (sparse_inst && !supports_sparse) { | ||
| 199 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 200 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 201 | } | ||
| 202 | if (!sparse_inst || !supports_sparse) { | ||
| 203 | if (!offset.IsEmpty()) { | ||
| 204 | ctx.Add("{}=textureLodOffset({},{},{},{});", texel, texture, coords, lod_lc, | ||
| 205 | GetOffsetVec(ctx, offset)); | ||
| 206 | } else { | ||
| 207 | ctx.Add("{}=textureLod({},{},{});", texel, texture, coords, lod_lc); | ||
| 208 | } | ||
| 209 | return; | ||
| 210 | } | ||
| 211 | if (!offset.IsEmpty()) { | ||
| 212 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", | ||
| 213 | *sparse_inst, texture, CastToIntVec(coords, info), lod_lc, | ||
| 214 | GetOffsetVec(ctx, offset), texel); | ||
| 215 | } else { | ||
| 216 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureLodARB({},{},{},{}));", *sparse_inst, | ||
| 217 | texture, coords, lod_lc, texel); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 222 | std::string_view coords, std::string_view dref, | ||
| 223 | std::string_view bias_lc, const IR::Value& offset) { | ||
| 224 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 225 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 226 | if (sparse_inst) { | ||
| 227 | throw NotImplementedException("EmitImageSampleDrefImplicitLod Sparse texture samples"); | ||
| 228 | } | ||
| 229 | if (info.has_bias) { | ||
| 230 | throw NotImplementedException("EmitImageSampleDrefImplicitLod Bias texture samples"); | ||
| 231 | } | ||
| 232 | if (info.has_lod_clamp) { | ||
| 233 | throw NotImplementedException("EmitImageSampleDrefImplicitLod Lod clamp samples"); | ||
| 234 | } | ||
| 235 | const auto texture{Texture(ctx, info, index)}; | ||
| 236 | const auto bias{info.has_bias ? fmt::format(",{}", bias_lc) : ""}; | ||
| 237 | const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; | ||
| 238 | const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; | ||
| 239 | const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && | ||
| 240 | ctx.stage != Stage::Fragment && needs_shadow_ext}; | ||
| 241 | if (use_grad) { | ||
| 242 | LOG_WARNING(Shader_GLSL, | ||
| 243 | "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); | ||
| 244 | if (info.type == TextureType::ColorArrayCube) { | ||
| 245 | LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); | ||
| 246 | ctx.AddF32("{}=0.0f;", inst); | ||
| 247 | return; | ||
| 248 | } | ||
| 249 | const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; | ||
| 250 | ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, | ||
| 251 | d_cast, d_cast); | ||
| 252 | return; | ||
| 253 | } | ||
| 254 | if (!offset.IsEmpty()) { | ||
| 255 | const auto offset_str{GetOffsetVec(ctx, offset)}; | ||
| 256 | if (ctx.stage == Stage::Fragment) { | ||
| 257 | ctx.AddF32("{}=textureOffset({},{}({},{}),{}{});", inst, texture, cast, coords, dref, | ||
| 258 | offset_str, bias); | ||
| 259 | } else { | ||
| 260 | ctx.AddF32("{}=textureLodOffset({},{}({},{}),0.0,{});", inst, texture, cast, coords, | ||
| 261 | dref, offset_str); | ||
| 262 | } | ||
| 263 | } else { | ||
| 264 | if (ctx.stage == Stage::Fragment) { | ||
| 265 | if (info.type == TextureType::ColorArrayCube) { | ||
| 266 | ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref); | ||
| 267 | } else { | ||
| 268 | ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias); | ||
| 269 | } | ||
| 270 | } else { | ||
| 271 | ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref); | ||
| 272 | } | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 277 | std::string_view coords, std::string_view dref, | ||
| 278 | std::string_view lod_lc, const IR::Value& offset) { | ||
| 279 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 280 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 281 | if (sparse_inst) { | ||
| 282 | throw NotImplementedException("EmitImageSampleDrefExplicitLod Sparse texture samples"); | ||
| 283 | } | ||
| 284 | if (info.has_bias) { | ||
| 285 | throw NotImplementedException("EmitImageSampleDrefExplicitLod Bias texture samples"); | ||
| 286 | } | ||
| 287 | if (info.has_lod_clamp) { | ||
| 288 | throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples"); | ||
| 289 | } | ||
| 290 | const auto texture{Texture(ctx, info, index)}; | ||
| 291 | const bool needs_shadow_ext{NeedsShadowLodExt(info.type)}; | ||
| 292 | const bool use_grad{!ctx.profile.support_gl_texture_shadow_lod && needs_shadow_ext}; | ||
| 293 | const auto cast{needs_shadow_ext ? "vec4" : "vec3"}; | ||
| 294 | if (use_grad) { | ||
| 295 | LOG_WARNING(Shader_GLSL, | ||
| 296 | "Device lacks GL_EXT_texture_shadow_lod. Using textureGrad fallback"); | ||
| 297 | if (info.type == TextureType::ColorArrayCube) { | ||
| 298 | LOG_WARNING(Shader_GLSL, "textureGrad does not support ColorArrayCube. Stubbing"); | ||
| 299 | ctx.AddF32("{}=0.0f;", inst); | ||
| 300 | return; | ||
| 301 | } | ||
| 302 | const auto d_cast{info.type == TextureType::ColorArray2D ? "vec2" : "vec3"}; | ||
| 303 | ctx.AddF32("{}=textureGrad({},{}({},{}),{}(0),{}(0));", inst, texture, cast, coords, dref, | ||
| 304 | d_cast, d_cast); | ||
| 305 | return; | ||
| 306 | } | ||
| 307 | if (!offset.IsEmpty()) { | ||
| 308 | const auto offset_str{GetOffsetVec(ctx, offset)}; | ||
| 309 | if (info.type == TextureType::ColorArrayCube) { | ||
| 310 | ctx.AddF32("{}=textureLodOffset({},{},{},{},{});", inst, texture, coords, dref, lod_lc, | ||
| 311 | offset_str); | ||
| 312 | } else { | ||
| 313 | ctx.AddF32("{}=textureLodOffset({},{}({},{}),{},{});", inst, texture, cast, coords, | ||
| 314 | dref, lod_lc, offset_str); | ||
| 315 | } | ||
| 316 | } else { | ||
| 317 | if (info.type == TextureType::ColorArrayCube) { | ||
| 318 | ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc); | ||
| 319 | } else { | ||
| 320 | ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref, | ||
| 321 | lod_lc); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 327 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2) { | ||
| 328 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 329 | const auto texture{Texture(ctx, info, index)}; | ||
| 330 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 331 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 332 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 333 | if (sparse_inst && !supports_sparse) { | ||
| 334 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 335 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 336 | } | ||
| 337 | if (!sparse_inst || !supports_sparse) { | ||
| 338 | if (offset.IsEmpty()) { | ||
| 339 | ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, | ||
| 340 | info.gather_component); | ||
| 341 | return; | ||
| 342 | } | ||
| 343 | if (offset2.IsEmpty()) { | ||
| 344 | ctx.Add("{}=textureGatherOffset({},{},{},int({}));", texel, texture, coords, | ||
| 345 | GetOffsetVec(ctx, offset), info.gather_component); | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | // PTP | ||
| 349 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 350 | ctx.Add("{}=textureGatherOffsets({},{},{},int({}));", texel, texture, coords, offsets, | ||
| 351 | info.gather_component); | ||
| 352 | return; | ||
| 353 | } | ||
| 354 | if (offset.IsEmpty()) { | ||
| 355 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},int({})));", | ||
| 356 | *sparse_inst, texture, coords, texel, info.gather_component); | ||
| 357 | return; | ||
| 358 | } | ||
| 359 | if (offset2.IsEmpty()) { | ||
| 360 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", | ||
| 361 | *sparse_inst, texture, CastToIntVec(coords, info), GetOffsetVec(ctx, offset), | ||
| 362 | texel, info.gather_component); | ||
| 363 | return; | ||
| 364 | } | ||
| 365 | // PTP | ||
| 366 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 367 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},{},int({})));", | ||
| 368 | *sparse_inst, texture, CastToIntVec(coords, info), offsets, texel, | ||
| 369 | info.gather_component); | ||
| 370 | } | ||
| 371 | |||
| 372 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 373 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2, | ||
| 374 | std::string_view dref) { | ||
| 375 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 376 | const auto texture{Texture(ctx, info, index)}; | ||
| 377 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 378 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 379 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 380 | if (sparse_inst && !supports_sparse) { | ||
| 381 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 382 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 383 | } | ||
| 384 | if (!sparse_inst || !supports_sparse) { | ||
| 385 | if (offset.IsEmpty()) { | ||
| 386 | ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); | ||
| 387 | return; | ||
| 388 | } | ||
| 389 | if (offset2.IsEmpty()) { | ||
| 390 | ctx.Add("{}=textureGatherOffset({},{},{},{});", texel, texture, coords, dref, | ||
| 391 | GetOffsetVec(ctx, offset)); | ||
| 392 | return; | ||
| 393 | } | ||
| 394 | // PTP | ||
| 395 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 396 | ctx.Add("{}=textureGatherOffsets({},{},{},{});", texel, texture, coords, dref, offsets); | ||
| 397 | return; | ||
| 398 | } | ||
| 399 | if (offset.IsEmpty()) { | ||
| 400 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherARB({},{},{},{}));", *sparse_inst, | ||
| 401 | texture, coords, dref, texel); | ||
| 402 | return; | ||
| 403 | } | ||
| 404 | if (offset2.IsEmpty()) { | ||
| 405 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", | ||
| 406 | *sparse_inst, texture, CastToIntVec(coords, info), dref, | ||
| 407 | GetOffsetVec(ctx, offset), texel); | ||
| 408 | return; | ||
| 409 | } | ||
| 410 | // PTP | ||
| 411 | const auto offsets{PtpOffsets(offset, offset2)}; | ||
| 412 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTextureGatherOffsetARB({},{},{},,{},{}));", | ||
| 413 | *sparse_inst, texture, CastToIntVec(coords, info), dref, offsets, texel); | ||
| 414 | } | ||
| 415 | |||
| 416 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 417 | std::string_view coords, std::string_view offset, std::string_view lod, | ||
| 418 | [[maybe_unused]] std::string_view ms) { | ||
| 419 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 420 | if (info.has_bias) { | ||
| 421 | throw NotImplementedException("EmitImageFetch Bias texture samples"); | ||
| 422 | } | ||
| 423 | if (info.has_lod_clamp) { | ||
| 424 | throw NotImplementedException("EmitImageFetch Lod clamp samples"); | ||
| 425 | } | ||
| 426 | const auto texture{Texture(ctx, info, index)}; | ||
| 427 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 428 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 429 | const bool supports_sparse{ctx.profile.support_gl_sparse_textures}; | ||
| 430 | if (sparse_inst && !supports_sparse) { | ||
| 431 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | ||
| 432 | ctx.AddU1("{}=true;", *sparse_inst); | ||
| 433 | } | ||
| 434 | if (!sparse_inst || !supports_sparse) { | ||
| 435 | if (!offset.empty()) { | ||
| 436 | ctx.Add("{}=texelFetchOffset({},{},int({}),{});", texel, texture, | ||
| 437 | CoordsCastToInt(coords, info), lod, CoordsCastToInt(offset, info)); | ||
| 438 | } else { | ||
| 439 | if (info.type == TextureType::Buffer) { | ||
| 440 | ctx.Add("{}=texelFetch({},int({}));", texel, texture, coords); | ||
| 441 | } else { | ||
| 442 | ctx.Add("{}=texelFetch({},{},int({}));", texel, texture, | ||
| 443 | CoordsCastToInt(coords, info), lod); | ||
| 444 | } | ||
| 445 | } | ||
| 446 | return; | ||
| 447 | } | ||
| 448 | if (!offset.empty()) { | ||
| 449 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchOffsetARB({},{},int({}),{},{}));", | ||
| 450 | *sparse_inst, texture, CastToIntVec(coords, info), lod, | ||
| 451 | CastToIntVec(offset, info), texel); | ||
| 452 | } else { | ||
| 453 | ctx.AddU1("{}=sparseTexelsResidentARB(sparseTexelFetchARB({},{},int({}),{}));", | ||
| 454 | *sparse_inst, texture, CastToIntVec(coords, info), lod, texel); | ||
| 455 | } | ||
| 456 | } | ||
| 457 | |||
| 458 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 459 | std::string_view lod) { | ||
| 460 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 461 | const auto texture{Texture(ctx, info, index)}; | ||
| 462 | switch (info.type) { | ||
| 463 | case TextureType::Color1D: | ||
| 464 | return ctx.AddU32x4( | ||
| 465 | "{}=uvec4(uint(textureSize({},int({}))),0u,0u,uint(textureQueryLevels({})));", inst, | ||
| 466 | texture, lod, texture); | ||
| 467 | case TextureType::ColorArray1D: | ||
| 468 | case TextureType::Color2D: | ||
| 469 | case TextureType::ColorCube: | ||
| 470 | return ctx.AddU32x4( | ||
| 471 | "{}=uvec4(uvec2(textureSize({},int({}))),0u,uint(textureQueryLevels({})));", inst, | ||
| 472 | texture, lod, texture); | ||
| 473 | case TextureType::ColorArray2D: | ||
| 474 | case TextureType::Color3D: | ||
| 475 | case TextureType::ColorArrayCube: | ||
| 476 | return ctx.AddU32x4( | ||
| 477 | "{}=uvec4(uvec3(textureSize({},int({}))),uint(textureQueryLevels({})));", inst, texture, | ||
| 478 | lod, texture); | ||
| 479 | case TextureType::Buffer: | ||
| 480 | throw NotImplementedException("EmitImageQueryDimensions Texture buffers"); | ||
| 481 | } | ||
| 482 | throw LogicError("Unspecified image type {}", info.type.Value()); | ||
| 483 | } | ||
| 484 | |||
| 485 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 486 | std::string_view coords) { | ||
| 487 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 488 | const auto texture{Texture(ctx, info, index)}; | ||
| 489 | return ctx.AddF32x4("{}=vec4(textureQueryLod({},{}),0.0,0.0);", inst, texture, coords); | ||
| 490 | } | ||
| 491 | |||
| 492 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 493 | std::string_view coords, const IR::Value& derivatives, | ||
| 494 | const IR::Value& offset, [[maybe_unused]] const IR::Value& lod_clamp) { | ||
| 495 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 496 | if (info.has_lod_clamp) { | ||
| 497 | throw NotImplementedException("EmitImageGradient Lod clamp samples"); | ||
| 498 | } | ||
| 499 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 500 | if (sparse_inst) { | ||
| 501 | throw NotImplementedException("EmitImageGradient Sparse"); | ||
| 502 | } | ||
| 503 | if (!offset.IsEmpty()) { | ||
| 504 | throw NotImplementedException("EmitImageGradient offset"); | ||
| 505 | } | ||
| 506 | const auto texture{Texture(ctx, info, index)}; | ||
| 507 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | ||
| 508 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||
| 509 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | ||
| 510 | if (multi_component) { | ||
| 511 | ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, | ||
| 512 | derivatives_vec, derivatives_vec); | ||
| 513 | } else { | ||
| 514 | ctx.Add("{}=textureGrad({},{},float({}.x),float({}.y));", texel, texture, coords, | ||
| 515 | derivatives_vec, derivatives_vec); | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 520 | std::string_view coords) { | ||
| 521 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 522 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 523 | if (sparse_inst) { | ||
| 524 | throw NotImplementedException("EmitImageRead Sparse"); | ||
| 525 | } | ||
| 526 | const auto image{Image(ctx, info, index)}; | ||
| 527 | ctx.AddU32x4("{}=uvec4(imageLoad({},{}));", inst, image, CoordsCastToInt(coords, info)); | ||
| 528 | } | ||
| 529 | |||
| 530 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 531 | std::string_view coords, std::string_view color) { | ||
| 532 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 533 | const auto image{Image(ctx, info, index)}; | ||
| 534 | ctx.Add("imageStore({},{},{});", image, CoordsCastToInt(coords, info), color); | ||
| 535 | } | ||
| 536 | |||
| 537 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 538 | std::string_view coords, std::string_view value) { | ||
| 539 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 540 | const auto image{Image(ctx, info, index)}; | ||
| 541 | ctx.AddU32("{}=imageAtomicAdd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 542 | } | ||
| 543 | |||
| 544 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 545 | std::string_view coords, std::string_view value) { | ||
| 546 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 547 | const auto image{Image(ctx, info, index)}; | ||
| 548 | ctx.AddU32("{}=imageAtomicMin({},{},int({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 549 | value); | ||
| 550 | } | ||
| 551 | |||
| 552 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 553 | std::string_view coords, std::string_view value) { | ||
| 554 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 555 | const auto image{Image(ctx, info, index)}; | ||
| 556 | ctx.AddU32("{}=imageAtomicMin({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 557 | value); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 561 | std::string_view coords, std::string_view value) { | ||
| 562 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 563 | const auto image{Image(ctx, info, index)}; | ||
| 564 | ctx.AddU32("{}=imageAtomicMax({},{},int({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 565 | value); | ||
| 566 | } | ||
| 567 | |||
| 568 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 569 | std::string_view coords, std::string_view value) { | ||
| 570 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 571 | const auto image{Image(ctx, info, index)}; | ||
| 572 | ctx.AddU32("{}=imageAtomicMax({},{},uint({}));", inst, image, CoordsCastToInt(coords, info), | ||
| 573 | value); | ||
| 574 | } | ||
| 575 | |||
| 576 | void EmitImageAtomicInc32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, | ||
| 577 | std::string_view) { | ||
| 578 | NotImplemented(); | ||
| 579 | } | ||
| 580 | |||
| 581 | void EmitImageAtomicDec32(EmitContext&, IR::Inst&, const IR::Value&, std::string_view, | ||
| 582 | std::string_view) { | ||
| 583 | NotImplemented(); | ||
| 584 | } | ||
| 585 | |||
| 586 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 587 | std::string_view coords, std::string_view value) { | ||
| 588 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 589 | const auto image{Image(ctx, info, index)}; | ||
| 590 | ctx.AddU32("{}=imageAtomicAnd({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 591 | } | ||
| 592 | |||
| 593 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 594 | std::string_view coords, std::string_view value) { | ||
| 595 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 596 | const auto image{Image(ctx, info, index)}; | ||
| 597 | ctx.AddU32("{}=imageAtomicOr({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 598 | } | ||
| 599 | |||
| 600 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 601 | std::string_view coords, std::string_view value) { | ||
| 602 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 603 | const auto image{Image(ctx, info, index)}; | ||
| 604 | ctx.AddU32("{}=imageAtomicXor({},{},{});", inst, image, CoordsCastToInt(coords, info), value); | ||
| 605 | } | ||
| 606 | |||
| 607 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 608 | std::string_view coords, std::string_view value) { | ||
| 609 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 610 | const auto image{Image(ctx, info, index)}; | ||
| 611 | ctx.AddU32("{}=imageAtomicExchange({},{},{});", inst, image, CoordsCastToInt(coords, info), | ||
| 612 | value); | ||
| 613 | } | ||
| 614 | |||
| 615 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 616 | NotImplemented(); | ||
| 617 | } | ||
| 618 | |||
| 619 | void EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 620 | NotImplemented(); | ||
| 621 | } | ||
| 622 | |||
| 623 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 624 | NotImplemented(); | ||
| 625 | } | ||
| 626 | |||
| 627 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 628 | NotImplemented(); | ||
| 629 | } | ||
| 630 | |||
| 631 | void EmitBindlessImageGather(EmitContext&) { | ||
| 632 | NotImplemented(); | ||
| 633 | } | ||
| 634 | |||
| 635 | void EmitBindlessImageGatherDref(EmitContext&) { | ||
| 636 | NotImplemented(); | ||
| 637 | } | ||
| 638 | |||
| 639 | void EmitBindlessImageFetch(EmitContext&) { | ||
| 640 | NotImplemented(); | ||
| 641 | } | ||
| 642 | |||
| 643 | void EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 644 | NotImplemented(); | ||
| 645 | } | ||
| 646 | |||
| 647 | void EmitBindlessImageQueryLod(EmitContext&) { | ||
| 648 | NotImplemented(); | ||
| 649 | } | ||
| 650 | |||
| 651 | void EmitBindlessImageGradient(EmitContext&) { | ||
| 652 | NotImplemented(); | ||
| 653 | } | ||
| 654 | |||
| 655 | void EmitBindlessImageRead(EmitContext&) { | ||
| 656 | NotImplemented(); | ||
| 657 | } | ||
| 658 | |||
| 659 | void EmitBindlessImageWrite(EmitContext&) { | ||
| 660 | NotImplemented(); | ||
| 661 | } | ||
| 662 | |||
| 663 | void EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 664 | NotImplemented(); | ||
| 665 | } | ||
| 666 | |||
| 667 | void EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 668 | NotImplemented(); | ||
| 669 | } | ||
| 670 | |||
| 671 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 672 | NotImplemented(); | ||
| 673 | } | ||
| 674 | |||
| 675 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 676 | NotImplemented(); | ||
| 677 | } | ||
| 678 | |||
| 679 | void EmitBoundImageGather(EmitContext&) { | ||
| 680 | NotImplemented(); | ||
| 681 | } | ||
| 682 | |||
| 683 | void EmitBoundImageGatherDref(EmitContext&) { | ||
| 684 | NotImplemented(); | ||
| 685 | } | ||
| 686 | |||
| 687 | void EmitBoundImageFetch(EmitContext&) { | ||
| 688 | NotImplemented(); | ||
| 689 | } | ||
| 690 | |||
| 691 | void EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 692 | NotImplemented(); | ||
| 693 | } | ||
| 694 | |||
| 695 | void EmitBoundImageQueryLod(EmitContext&) { | ||
| 696 | NotImplemented(); | ||
| 697 | } | ||
| 698 | |||
| 699 | void EmitBoundImageGradient(EmitContext&) { | ||
| 700 | NotImplemented(); | ||
| 701 | } | ||
| 702 | |||
| 703 | void EmitBoundImageRead(EmitContext&) { | ||
| 704 | NotImplemented(); | ||
| 705 | } | ||
| 706 | |||
| 707 | void EmitBoundImageWrite(EmitContext&) { | ||
| 708 | NotImplemented(); | ||
| 709 | } | ||
| 710 | |||
| 711 | void EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 712 | NotImplemented(); | ||
| 713 | } | ||
| 714 | |||
| 715 | void EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 716 | NotImplemented(); | ||
| 717 | } | ||
| 718 | |||
| 719 | void EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 720 | NotImplemented(); | ||
| 721 | } | ||
| 722 | |||
| 723 | void EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 724 | NotImplemented(); | ||
| 725 | } | ||
| 726 | |||
| 727 | void EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 728 | NotImplemented(); | ||
| 729 | } | ||
| 730 | |||
| 731 | void EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 732 | NotImplemented(); | ||
| 733 | } | ||
| 734 | |||
| 735 | void EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 736 | NotImplemented(); | ||
| 737 | } | ||
| 738 | |||
| 739 | void EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 740 | NotImplemented(); | ||
| 741 | } | ||
| 742 | |||
| 743 | void EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 744 | NotImplemented(); | ||
| 745 | } | ||
| 746 | |||
| 747 | void EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 748 | NotImplemented(); | ||
| 749 | } | ||
| 750 | |||
| 751 | void EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 752 | NotImplemented(); | ||
| 753 | } | ||
| 754 | |||
| 755 | void EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 756 | NotImplemented(); | ||
| 757 | } | ||
| 758 | |||
| 759 | void EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 760 | NotImplemented(); | ||
| 761 | } | ||
| 762 | |||
| 763 | void EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 764 | NotImplemented(); | ||
| 765 | } | ||
| 766 | |||
| 767 | void EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 768 | NotImplemented(); | ||
| 769 | } | ||
| 770 | |||
| 771 | void EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 772 | NotImplemented(); | ||
| 773 | } | ||
| 774 | |||
| 775 | void EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 776 | NotImplemented(); | ||
| 777 | } | ||
| 778 | |||
| 779 | void EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 780 | NotImplemented(); | ||
| 781 | } | ||
| 782 | |||
| 783 | void EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 784 | NotImplemented(); | ||
| 785 | } | ||
| 786 | |||
| 787 | void EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 788 | NotImplemented(); | ||
| 789 | } | ||
| 790 | |||
| 791 | void EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 792 | NotImplemented(); | ||
| 793 | } | ||
| 794 | |||
| 795 | void EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 796 | NotImplemented(); | ||
| 797 | } | ||
| 798 | |||
| 799 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h new file mode 100644 index 000000000..5936d086f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h | |||
| @@ -0,0 +1,702 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string_view> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | enum class Attribute : u64; | ||
| 13 | enum class Patch : u64; | ||
| 14 | class Inst; | ||
| 15 | class Value; | ||
| 16 | } // namespace Shader::IR | ||
| 17 | |||
| 18 | namespace Shader::Backend::GLSL { | ||
| 19 | class EmitContext; | ||
| 20 | |||
| 21 | #define NotImplemented() throw NotImplementedException("GLSL instruction {}", __func__) | ||
| 22 | |||
| 23 | // Microinstruction emitters | ||
| 24 | void EmitPhi(EmitContext& ctx, IR::Inst& inst); | ||
| 25 | void EmitVoid(EmitContext& ctx); | ||
| 26 | void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 27 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 28 | void EmitReference(EmitContext& ctx, const IR::Value& value); | ||
| 29 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); | ||
| 30 | void EmitJoin(EmitContext& ctx); | ||
| 31 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 32 | void EmitBarrier(EmitContext& ctx); | ||
| 33 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 34 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 35 | void EmitPrologue(EmitContext& ctx); | ||
| 36 | void EmitEpilogue(EmitContext& ctx); | ||
| 37 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); | ||
| 38 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 39 | void EmitGetRegister(EmitContext& ctx); | ||
| 40 | void EmitSetRegister(EmitContext& ctx); | ||
| 41 | void EmitGetPred(EmitContext& ctx); | ||
| 42 | void EmitSetPred(EmitContext& ctx); | ||
| 43 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 44 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 45 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 46 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 47 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 48 | const IR::Value& offset); | ||
| 49 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 50 | const IR::Value& offset); | ||
| 51 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 52 | const IR::Value& offset); | ||
| 53 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 54 | const IR::Value& offset); | ||
| 55 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 56 | const IR::Value& offset); | ||
| 57 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 58 | const IR::Value& offset); | ||
| 59 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 60 | const IR::Value& offset); | ||
| 61 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, | ||
| 62 | std::string_view vertex); | ||
| 63 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view value, | ||
| 64 | std::string_view vertex); | ||
| 65 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, std::string_view offset, | ||
| 66 | std::string_view vertex); | ||
| 67 | void EmitSetAttributeIndexed(EmitContext& ctx, std::string_view offset, std::string_view value, | ||
| 68 | std::string_view vertex); | ||
| 69 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); | ||
| 70 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, std::string_view value); | ||
| 71 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_view value); | ||
| 72 | void EmitSetSampleMask(EmitContext& ctx, std::string_view value); | ||
| 73 | void EmitSetFragDepth(EmitContext& ctx, std::string_view value); | ||
| 74 | void EmitGetZFlag(EmitContext& ctx); | ||
| 75 | void EmitGetSFlag(EmitContext& ctx); | ||
| 76 | void EmitGetCFlag(EmitContext& ctx); | ||
| 77 | void EmitGetOFlag(EmitContext& ctx); | ||
| 78 | void EmitSetZFlag(EmitContext& ctx); | ||
| 79 | void EmitSetSFlag(EmitContext& ctx); | ||
| 80 | void EmitSetCFlag(EmitContext& ctx); | ||
| 81 | void EmitSetOFlag(EmitContext& ctx); | ||
| 82 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); | ||
| 83 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 84 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 85 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | ||
| 86 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | ||
| 87 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | ||
| 88 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, std::string_view word_offset); | ||
| 89 | void EmitWriteLocal(EmitContext& ctx, std::string_view word_offset, std::string_view value); | ||
| 90 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | ||
| 91 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); | ||
| 92 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); | ||
| 93 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); | ||
| 94 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); | ||
| 95 | void EmitLoadGlobalU8(EmitContext& ctx); | ||
| 96 | void EmitLoadGlobalS8(EmitContext& ctx); | ||
| 97 | void EmitLoadGlobalU16(EmitContext& ctx); | ||
| 98 | void EmitLoadGlobalS16(EmitContext& ctx); | ||
| 99 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address); | ||
| 100 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address); | ||
| 101 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address); | ||
| 102 | void EmitWriteGlobalU8(EmitContext& ctx); | ||
| 103 | void EmitWriteGlobalS8(EmitContext& ctx); | ||
| 104 | void EmitWriteGlobalU16(EmitContext& ctx); | ||
| 105 | void EmitWriteGlobalS16(EmitContext& ctx); | ||
| 106 | void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); | ||
| 107 | void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); | ||
| 108 | void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); | ||
| 109 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 110 | const IR::Value& offset); | ||
| 111 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 112 | const IR::Value& offset); | ||
| 113 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 114 | const IR::Value& offset); | ||
| 115 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 116 | const IR::Value& offset); | ||
| 117 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 118 | const IR::Value& offset); | ||
| 119 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 120 | const IR::Value& offset); | ||
| 121 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 122 | const IR::Value& offset); | ||
| 123 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 124 | std::string_view value); | ||
| 125 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 126 | std::string_view value); | ||
| 127 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 128 | std::string_view value); | ||
| 129 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 130 | std::string_view value); | ||
| 131 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 132 | std::string_view value); | ||
| 133 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 134 | std::string_view value); | ||
| 135 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 136 | std::string_view value); | ||
| 137 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 138 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 139 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 140 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 141 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 142 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 143 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset); | ||
| 144 | void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 145 | void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 146 | void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 147 | void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 148 | void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value); | ||
| 149 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 150 | std::string_view e2); | ||
| 151 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 152 | std::string_view e2, std::string_view e3); | ||
| 153 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 154 | std::string_view e2, std::string_view e3, std::string_view e4); | ||
| 155 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 156 | u32 index); | ||
| 157 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 158 | u32 index); | ||
| 159 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 160 | u32 index); | ||
| 161 | void EmitCompositeInsertU32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 162 | std::string_view object, u32 index); | ||
| 163 | void EmitCompositeInsertU32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 164 | std::string_view object, u32 index); | ||
| 165 | void EmitCompositeInsertU32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 166 | std::string_view object, u32 index); | ||
| 167 | void EmitCompositeConstructF16x2(EmitContext& ctx, std::string_view e1, std::string_view e2); | ||
| 168 | void EmitCompositeConstructF16x3(EmitContext& ctx, std::string_view e1, std::string_view e2, | ||
| 169 | std::string_view e3); | ||
| 170 | void EmitCompositeConstructF16x4(EmitContext& ctx, std::string_view e1, std::string_view e2, | ||
| 171 | std::string_view e3, std::string_view e4); | ||
| 172 | void EmitCompositeExtractF16x2(EmitContext& ctx, std::string_view composite, u32 index); | ||
| 173 | void EmitCompositeExtractF16x3(EmitContext& ctx, std::string_view composite, u32 index); | ||
| 174 | void EmitCompositeExtractF16x4(EmitContext& ctx, std::string_view composite, u32 index); | ||
| 175 | void EmitCompositeInsertF16x2(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 176 | u32 index); | ||
| 177 | void EmitCompositeInsertF16x3(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 178 | u32 index); | ||
| 179 | void EmitCompositeInsertF16x4(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 180 | u32 index); | ||
| 181 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 182 | std::string_view e2); | ||
| 183 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 184 | std::string_view e2, std::string_view e3); | ||
| 185 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view e1, | ||
| 186 | std::string_view e2, std::string_view e3, std::string_view e4); | ||
| 187 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 188 | u32 index); | ||
| 189 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 190 | u32 index); | ||
| 191 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 192 | u32 index); | ||
| 193 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 194 | std::string_view object, u32 index); | ||
| 195 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 196 | std::string_view object, u32 index); | ||
| 197 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, std::string_view composite, | ||
| 198 | std::string_view object, u32 index); | ||
| 199 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 200 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 201 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 202 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 203 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 204 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 205 | void EmitCompositeInsertF64x2(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 206 | u32 index); | ||
| 207 | void EmitCompositeInsertF64x3(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 208 | u32 index); | ||
| 209 | void EmitCompositeInsertF64x4(EmitContext& ctx, std::string_view composite, std::string_view object, | ||
| 210 | u32 index); | ||
| 211 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 212 | std::string_view true_value, std::string_view false_value); | ||
| 213 | void EmitSelectU8(EmitContext& ctx, std::string_view cond, std::string_view true_value, | ||
| 214 | std::string_view false_value); | ||
| 215 | void EmitSelectU16(EmitContext& ctx, std::string_view cond, std::string_view true_value, | ||
| 216 | std::string_view false_value); | ||
| 217 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 218 | std::string_view true_value, std::string_view false_value); | ||
| 219 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 220 | std::string_view true_value, std::string_view false_value); | ||
| 221 | void EmitSelectF16(EmitContext& ctx, std::string_view cond, std::string_view true_value, | ||
| 222 | std::string_view false_value); | ||
| 223 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 224 | std::string_view true_value, std::string_view false_value); | ||
| 225 | void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 226 | std::string_view true_value, std::string_view false_value); | ||
| 227 | void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst); | ||
| 228 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 229 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 230 | void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst); | ||
| 231 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 232 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 233 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 234 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 235 | void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 236 | void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 237 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 238 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 239 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 240 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 241 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 242 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 243 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 244 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 245 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 246 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 247 | void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 248 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 249 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 250 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 251 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 252 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 253 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 254 | std::string_view c); | ||
| 255 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 256 | std::string_view c); | ||
| 257 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 258 | std::string_view c); | ||
| 259 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 260 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 261 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 262 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 263 | void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 264 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 265 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 266 | void EmitFPNeg16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 267 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 268 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 269 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 270 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 271 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 272 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 273 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 274 | void EmitFPRecip64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 275 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 276 | void EmitFPRecipSqrt64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 277 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 278 | void EmitFPSaturate16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 279 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 280 | void EmitFPSaturate64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 281 | void EmitFPClamp16(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 282 | std::string_view min_value, std::string_view max_value); | ||
| 283 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 284 | std::string_view min_value, std::string_view max_value); | ||
| 285 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 286 | std::string_view min_value, std::string_view max_value); | ||
| 287 | void EmitFPRoundEven16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 288 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 289 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 290 | void EmitFPFloor16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 291 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 292 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 293 | void EmitFPCeil16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 294 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 295 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 296 | void EmitFPTrunc16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 297 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 298 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 299 | void EmitFPOrdEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 300 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 301 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 302 | void EmitFPUnordEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 303 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 304 | std::string_view rhs); | ||
| 305 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 306 | std::string_view rhs); | ||
| 307 | void EmitFPOrdNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 308 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 309 | std::string_view rhs); | ||
| 310 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 311 | std::string_view rhs); | ||
| 312 | void EmitFPUnordNotEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 313 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 314 | std::string_view rhs); | ||
| 315 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 316 | std::string_view rhs); | ||
| 317 | void EmitFPOrdLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 318 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 319 | std::string_view rhs); | ||
| 320 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 321 | std::string_view rhs); | ||
| 322 | void EmitFPUnordLessThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 323 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 324 | std::string_view rhs); | ||
| 325 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 326 | std::string_view rhs); | ||
| 327 | void EmitFPOrdGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 328 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 329 | std::string_view rhs); | ||
| 330 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 331 | std::string_view rhs); | ||
| 332 | void EmitFPUnordGreaterThan16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 333 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 334 | std::string_view rhs); | ||
| 335 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 336 | std::string_view rhs); | ||
| 337 | void EmitFPOrdLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 338 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 339 | std::string_view rhs); | ||
| 340 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 341 | std::string_view rhs); | ||
| 342 | void EmitFPUnordLessThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 343 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 344 | std::string_view rhs); | ||
| 345 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 346 | std::string_view rhs); | ||
| 347 | void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 348 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 349 | std::string_view rhs); | ||
| 350 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 351 | std::string_view rhs); | ||
| 352 | void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, std::string_view lhs, std::string_view rhs); | ||
| 353 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 354 | std::string_view rhs); | ||
| 355 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 356 | std::string_view rhs); | ||
| 357 | void EmitFPIsNan16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 358 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 359 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 360 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 361 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 362 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 363 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 364 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 365 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 366 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 367 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 368 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 369 | std::string_view shift); | ||
| 370 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 371 | std::string_view shift); | ||
| 372 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 373 | std::string_view shift); | ||
| 374 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 375 | std::string_view shift); | ||
| 376 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 377 | std::string_view shift); | ||
| 378 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 379 | std::string_view shift); | ||
| 380 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 381 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 382 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 383 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 384 | std::string_view insert, std::string_view offset, std::string_view count); | ||
| 385 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 386 | std::string_view offset, std::string_view count); | ||
| 387 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 388 | std::string_view offset, std::string_view count); | ||
| 389 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 390 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 391 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 392 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 393 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 394 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 395 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 396 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 397 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 398 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 399 | std::string_view max); | ||
| 400 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 401 | std::string_view max); | ||
| 402 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 403 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 404 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 405 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 406 | std::string_view rhs); | ||
| 407 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 408 | std::string_view rhs); | ||
| 409 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 410 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 411 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs); | ||
| 412 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 413 | std::string_view rhs); | ||
| 414 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 415 | std::string_view rhs); | ||
| 416 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 417 | std::string_view value); | ||
| 418 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 419 | std::string_view value); | ||
| 420 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 421 | std::string_view value); | ||
| 422 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 423 | std::string_view value); | ||
| 424 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 425 | std::string_view value); | ||
| 426 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 427 | std::string_view value); | ||
| 428 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 429 | std::string_view value); | ||
| 430 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 431 | std::string_view value); | ||
| 432 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 433 | std::string_view value); | ||
| 434 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 435 | std::string_view value); | ||
| 436 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 437 | std::string_view value); | ||
| 438 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, std::string_view pointer_offset, | ||
| 439 | std::string_view value); | ||
| 440 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 441 | const IR::Value& offset, std::string_view value); | ||
| 442 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 443 | const IR::Value& offset, std::string_view value); | ||
| 444 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 445 | const IR::Value& offset, std::string_view value); | ||
| 446 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 447 | const IR::Value& offset, std::string_view value); | ||
| 448 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 449 | const IR::Value& offset, std::string_view value); | ||
| 450 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 451 | const IR::Value& offset, std::string_view value); | ||
| 452 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 453 | const IR::Value& offset, std::string_view value); | ||
| 454 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 455 | const IR::Value& offset, std::string_view value); | ||
| 456 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 457 | const IR::Value& offset, std::string_view value); | ||
| 458 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 459 | const IR::Value& offset, std::string_view value); | ||
| 460 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 461 | const IR::Value& offset, std::string_view value); | ||
| 462 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 463 | const IR::Value& offset, std::string_view value); | ||
| 464 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 465 | const IR::Value& offset, std::string_view value); | ||
| 466 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 467 | const IR::Value& offset, std::string_view value); | ||
| 468 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 469 | const IR::Value& offset, std::string_view value); | ||
| 470 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 471 | const IR::Value& offset, std::string_view value); | ||
| 472 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 473 | const IR::Value& offset, std::string_view value); | ||
| 474 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 475 | const IR::Value& offset, std::string_view value); | ||
| 476 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 477 | const IR::Value& offset, std::string_view value); | ||
| 478 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 479 | const IR::Value& offset, std::string_view value); | ||
| 480 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 481 | const IR::Value& offset, std::string_view value); | ||
| 482 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 483 | const IR::Value& offset, std::string_view value); | ||
| 484 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 485 | const IR::Value& offset, std::string_view value); | ||
| 486 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 487 | const IR::Value& offset, std::string_view value); | ||
| 488 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 489 | const IR::Value& offset, std::string_view value); | ||
| 490 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 491 | const IR::Value& offset, std::string_view value); | ||
| 492 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 493 | const IR::Value& offset, std::string_view value); | ||
| 494 | void EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 495 | void EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 496 | void EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 497 | void EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 498 | void EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 499 | void EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 500 | void EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 501 | void EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 502 | void EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 503 | void EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 504 | void EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 505 | void EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 506 | void EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 507 | void EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 508 | void EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 509 | void EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 510 | void EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 511 | void EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 512 | void EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 513 | void EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 514 | void EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 515 | void EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 516 | void EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 517 | void EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 518 | void EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 519 | void EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 520 | void EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 521 | void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 522 | void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 523 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 524 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 525 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b); | ||
| 526 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 527 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 528 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 529 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 530 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 531 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 532 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 533 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 534 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 535 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 536 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 537 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 538 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 539 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 540 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 541 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 542 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 543 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 544 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 545 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 546 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 547 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 548 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 549 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 550 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 551 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 552 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 553 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 554 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 555 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 556 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 557 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 558 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 559 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 560 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 561 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 562 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 563 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 564 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 565 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 566 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 567 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 568 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 569 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 570 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 571 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 572 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 573 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 574 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); | ||
| 575 | void EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 576 | void EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 577 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 578 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 579 | void EmitBindlessImageGather(EmitContext&); | ||
| 580 | void EmitBindlessImageGatherDref(EmitContext&); | ||
| 581 | void EmitBindlessImageFetch(EmitContext&); | ||
| 582 | void EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 583 | void EmitBindlessImageQueryLod(EmitContext&); | ||
| 584 | void EmitBindlessImageGradient(EmitContext&); | ||
| 585 | void EmitBindlessImageRead(EmitContext&); | ||
| 586 | void EmitBindlessImageWrite(EmitContext&); | ||
| 587 | void EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 588 | void EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 589 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 590 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 591 | void EmitBoundImageGather(EmitContext&); | ||
| 592 | void EmitBoundImageGatherDref(EmitContext&); | ||
| 593 | void EmitBoundImageFetch(EmitContext&); | ||
| 594 | void EmitBoundImageQueryDimensions(EmitContext&); | ||
| 595 | void EmitBoundImageQueryLod(EmitContext&); | ||
| 596 | void EmitBoundImageGradient(EmitContext&); | ||
| 597 | void EmitBoundImageRead(EmitContext&); | ||
| 598 | void EmitBoundImageWrite(EmitContext&); | ||
| 599 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 600 | std::string_view coords, std::string_view bias_lc, | ||
| 601 | const IR::Value& offset); | ||
| 602 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 603 | std::string_view coords, std::string_view lod_lc, | ||
| 604 | const IR::Value& offset); | ||
| 605 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 606 | std::string_view coords, std::string_view dref, | ||
| 607 | std::string_view bias_lc, const IR::Value& offset); | ||
| 608 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 609 | std::string_view coords, std::string_view dref, | ||
| 610 | std::string_view lod_lc, const IR::Value& offset); | ||
| 611 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 612 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2); | ||
| 613 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 614 | std::string_view coords, const IR::Value& offset, const IR::Value& offset2, | ||
| 615 | std::string_view dref); | ||
| 616 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 617 | std::string_view coords, std::string_view offset, std::string_view lod, | ||
| 618 | std::string_view ms); | ||
| 619 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 620 | std::string_view lod); | ||
| 621 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 622 | std::string_view coords); | ||
| 623 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 624 | std::string_view coords, const IR::Value& derivatives, | ||
| 625 | const IR::Value& offset, const IR::Value& lod_clamp); | ||
| 626 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 627 | std::string_view coords); | ||
| 628 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 629 | std::string_view coords, std::string_view color); | ||
| 630 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 631 | void EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 632 | void EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 633 | void EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 634 | void EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 635 | void EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 636 | void EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 637 | void EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 638 | void EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 639 | void EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 640 | void EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 641 | void EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 642 | void EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 643 | void EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 644 | void EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 645 | void EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 646 | void EmitBoundImageAtomicInc32(EmitContext&); | ||
| 647 | void EmitBoundImageAtomicDec32(EmitContext&); | ||
| 648 | void EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 649 | void EmitBoundImageAtomicOr32(EmitContext&); | ||
| 650 | void EmitBoundImageAtomicXor32(EmitContext&); | ||
| 651 | void EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 652 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 653 | std::string_view coords, std::string_view value); | ||
| 654 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 655 | std::string_view coords, std::string_view value); | ||
| 656 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 657 | std::string_view coords, std::string_view value); | ||
| 658 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 659 | std::string_view coords, std::string_view value); | ||
| 660 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 661 | std::string_view coords, std::string_view value); | ||
| 662 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 663 | std::string_view coords, std::string_view value); | ||
| 664 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 665 | std::string_view coords, std::string_view value); | ||
| 666 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 667 | std::string_view coords, std::string_view value); | ||
| 668 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 669 | std::string_view coords, std::string_view value); | ||
| 670 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 671 | std::string_view coords, std::string_view value); | ||
| 672 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 673 | std::string_view coords, std::string_view value); | ||
| 674 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst); | ||
| 675 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 676 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 677 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 678 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred); | ||
| 679 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); | ||
| 680 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 681 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 682 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 683 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 684 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 685 | std::string_view index, std::string_view clamp, | ||
| 686 | std::string_view segmentation_mask); | ||
| 687 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | ||
| 688 | std::string_view clamp, std::string_view segmentation_mask); | ||
| 689 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 690 | std::string_view index, std::string_view clamp, | ||
| 691 | std::string_view segmentation_mask); | ||
| 692 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 693 | std::string_view index, std::string_view clamp, | ||
| 694 | std::string_view segmentation_mask); | ||
| 695 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, | ||
| 696 | std::string_view swizzle); | ||
| 697 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 698 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 699 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 700 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a); | ||
| 701 | |||
| 702 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp new file mode 100644 index 000000000..38419f88f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | void SetZeroFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { | ||
| 14 | IR::Inst* const zero{inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; | ||
| 15 | if (!zero) { | ||
| 16 | return; | ||
| 17 | } | ||
| 18 | ctx.AddU1("{}={}==0;", *zero, result); | ||
| 19 | zero->Invalidate(); | ||
| 20 | } | ||
| 21 | |||
| 22 | void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { | ||
| 23 | IR::Inst* const sign{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; | ||
| 24 | if (!sign) { | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | ctx.AddU1("{}=int({})<0;", *sign, result); | ||
| 28 | sign->Invalidate(); | ||
| 29 | } | ||
| 30 | |||
| 31 | void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b, | ||
| 32 | char lop) { | ||
| 33 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 34 | ctx.Add("{}={}{}{};", result, a, lop, b); | ||
| 35 | SetZeroFlag(ctx, inst, result); | ||
| 36 | SetSignFlag(ctx, inst, result); | ||
| 37 | } | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 40 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 41 | // Compute the overflow CC first as it requires the original operand values, | ||
| 42 | // which may be overwritten by the result of the addition | ||
| 43 | if (IR::Inst * overflow{inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { | ||
| 44 | // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c | ||
| 45 | constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; | ||
| 46 | const auto sub_a{fmt::format("{}u-{}", s32_max, a)}; | ||
| 47 | const auto positive_result{fmt::format("int({})>int({})", b, sub_a)}; | ||
| 48 | const auto negative_result{fmt::format("int({})<int({})", b, sub_a)}; | ||
| 49 | ctx.AddU1("{}=int({})>=0?{}:{};", *overflow, a, positive_result, negative_result); | ||
| 50 | overflow->Invalidate(); | ||
| 51 | } | ||
| 52 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 53 | if (IR::Inst* const carry{inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { | ||
| 54 | ctx.uses_cc_carry = true; | ||
| 55 | ctx.Add("{}=uaddCarry({},{},carry);", result, a, b); | ||
| 56 | ctx.AddU1("{}=carry!=0;", *carry); | ||
| 57 | carry->Invalidate(); | ||
| 58 | } else { | ||
| 59 | ctx.Add("{}={}+{};", result, a, b); | ||
| 60 | } | ||
| 61 | SetZeroFlag(ctx, inst, result); | ||
| 62 | SetSignFlag(ctx, inst, result); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 66 | ctx.AddU64("{}={}+{};", inst, a, b); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 70 | ctx.AddU32("{}={}-{};", inst, a, b); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 74 | ctx.AddU64("{}={}-{};", inst, a, b); | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 78 | ctx.AddU32("{}=uint({}*{});", inst, a, b); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 82 | ctx.AddU32("{}=uint(-({}));", inst, value); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 86 | ctx.AddU64("{}=-({});", inst, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 90 | ctx.AddU32("{}=abs(int({}));", inst, value); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 94 | std::string_view shift) { | ||
| 95 | ctx.AddU32("{}={}<<{};", inst, base, shift); | ||
| 96 | } | ||
| 97 | |||
| 98 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 99 | std::string_view shift) { | ||
| 100 | ctx.AddU64("{}={}<<{};", inst, base, shift); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 104 | std::string_view shift) { | ||
| 105 | ctx.AddU32("{}={}>>{};", inst, base, shift); | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 109 | std::string_view shift) { | ||
| 110 | ctx.AddU64("{}={}>>{};", inst, base, shift); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 114 | std::string_view shift) { | ||
| 115 | ctx.AddU32("{}=int({})>>{};", inst, base, shift); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 119 | std::string_view shift) { | ||
| 120 | ctx.AddU64("{}=int64_t({})>>{};", inst, base, shift); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 124 | BitwiseLogicalOp(ctx, inst, a, b, '&'); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 128 | BitwiseLogicalOp(ctx, inst, a, b, '|'); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 132 | BitwiseLogicalOp(ctx, inst, a, b, '^'); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 136 | std::string_view insert, std::string_view offset, std::string_view count) { | ||
| 137 | ctx.AddU32("{}=bitfieldInsert({},{},int({}),int({}));", inst, base, insert, offset, count); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 141 | std::string_view offset, std::string_view count) { | ||
| 142 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 143 | ctx.Add("{}=uint(bitfieldExtract(int({}),int({}),int({})));", result, base, offset, count); | ||
| 144 | SetZeroFlag(ctx, inst, result); | ||
| 145 | SetSignFlag(ctx, inst, result); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||
| 149 | std::string_view offset, std::string_view count) { | ||
| 150 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 151 | ctx.Add("{}=uint(bitfieldExtract(uint({}),int({}),int({})));", result, base, offset, count); | ||
| 152 | SetZeroFlag(ctx, inst, result); | ||
| 153 | SetSignFlag(ctx, inst, result); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 157 | ctx.AddU32("{}=bitfieldReverse({});", inst, value); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 161 | ctx.AddU32("{}=bitCount({});", inst, value); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 165 | ctx.AddU32("{}=~{};", inst, value); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 169 | ctx.AddU32("{}=findMSB(int({}));", inst, value); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 173 | ctx.AddU32("{}=findMSB(uint({}));", inst, value); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 177 | ctx.AddU32("{}=min(int({}),int({}));", inst, a, b); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 181 | ctx.AddU32("{}=min(uint({}),uint({}));", inst, a, b); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 185 | ctx.AddU32("{}=max(int({}),int({}));", inst, a, b); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 189 | ctx.AddU32("{}=max(uint({}),uint({}));", inst, a, b); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 193 | std::string_view max) { | ||
| 194 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 195 | ctx.Add("{}=clamp(int({}),int({}),int({}));", result, value, min, max); | ||
| 196 | SetZeroFlag(ctx, inst, result); | ||
| 197 | SetSignFlag(ctx, inst, result); | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view min, | ||
| 201 | std::string_view max) { | ||
| 202 | const auto result{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | ||
| 203 | ctx.Add("{}=clamp(uint({}),uint({}),uint({}));", result, value, min, max); | ||
| 204 | SetZeroFlag(ctx, inst, result); | ||
| 205 | SetSignFlag(ctx, inst, result); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 209 | ctx.AddU1("{}=int({})<int({});", inst, lhs, rhs); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 213 | ctx.AddU1("{}=uint({})<uint({});", inst, lhs, rhs); | ||
| 214 | } | ||
| 215 | |||
| 216 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 217 | ctx.AddU1("{}={}=={};", inst, lhs, rhs); | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 221 | std::string_view rhs) { | ||
| 222 | ctx.AddU1("{}=int({})<=int({});", inst, lhs, rhs); | ||
| 223 | } | ||
| 224 | |||
| 225 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 226 | std::string_view rhs) { | ||
| 227 | ctx.AddU1("{}=uint({})<=uint({});", inst, lhs, rhs); | ||
| 228 | } | ||
| 229 | |||
| 230 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 231 | std::string_view rhs) { | ||
| 232 | ctx.AddU1("{}=int({})>int({});", inst, lhs, rhs); | ||
| 233 | } | ||
| 234 | |||
| 235 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 236 | std::string_view rhs) { | ||
| 237 | ctx.AddU1("{}=uint({})>uint({});", inst, lhs, rhs); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, std::string_view rhs) { | ||
| 241 | ctx.AddU1("{}={}!={};", inst, lhs, rhs); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 245 | std::string_view rhs) { | ||
| 246 | ctx.AddU1("{}=int({})>=int({});", inst, lhs, rhs); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, std::string_view lhs, | ||
| 250 | std::string_view rhs) { | ||
| 251 | ctx.AddU1("{}=uint({})>=uint({});", inst, lhs, rhs); | ||
| 252 | } | ||
| 253 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp new file mode 100644 index 000000000..338ff4bd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_logical.cpp | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | |||
| 13 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 14 | ctx.AddU1("{}={}||{};", inst, a, b); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 18 | ctx.AddU1("{}={}&&{};", inst, a, b); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||
| 22 | ctx.AddU1("{}={}^^{};", inst, a, b); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, std::string_view value) { | ||
| 26 | ctx.AddU1("{}=!{};", inst, value); | ||
| 27 | } | ||
| 28 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp new file mode 100644 index 000000000..e3957491f --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp | |||
| @@ -0,0 +1,202 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " | ||
| 15 | "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" | ||
| 16 | "if(cas_result==old_value){{break;}}}}"}; | ||
| 17 | |||
| 18 | void SsboWriteCas(EmitContext& ctx, const IR::Value& binding, std::string_view offset_var, | ||
| 19 | std::string_view value, std::string_view bit_offset, u32 num_bits) { | ||
| 20 | const auto ssbo{fmt::format("{}_ssbo{}[{}>>2]", ctx.stage_name, binding.U32(), offset_var)}; | ||
| 21 | ctx.Add(cas_loop, ssbo, ssbo, ssbo, value, bit_offset, num_bits); | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void EmitLoadGlobalU8(EmitContext&) { | ||
| 26 | NotImplemented(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void EmitLoadGlobalS8(EmitContext&) { | ||
| 30 | NotImplemented(); | ||
| 31 | } | ||
| 32 | |||
| 33 | void EmitLoadGlobalU16(EmitContext&) { | ||
| 34 | NotImplemented(); | ||
| 35 | } | ||
| 36 | |||
| 37 | void EmitLoadGlobalS16(EmitContext&) { | ||
| 38 | NotImplemented(); | ||
| 39 | } | ||
| 40 | |||
| 41 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) { | ||
| 42 | if (ctx.profile.support_int64) { | ||
| 43 | return ctx.AddU32("{}=LoadGlobal32({});", inst, address); | ||
| 44 | } | ||
| 45 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 46 | ctx.AddU32("{}=0u;", inst); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) { | ||
| 50 | if (ctx.profile.support_int64) { | ||
| 51 | return ctx.AddU32x2("{}=LoadGlobal64({});", inst, address); | ||
| 52 | } | ||
| 53 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 54 | ctx.AddU32x2("{}=uvec2(0);", inst); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) { | ||
| 58 | if (ctx.profile.support_int64) { | ||
| 59 | return ctx.AddU32x4("{}=LoadGlobal128({});", inst, address); | ||
| 60 | } | ||
| 61 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 62 | ctx.AddU32x4("{}=uvec4(0);", inst); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitWriteGlobalU8(EmitContext&) { | ||
| 66 | NotImplemented(); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitWriteGlobalS8(EmitContext&) { | ||
| 70 | NotImplemented(); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitWriteGlobalU16(EmitContext&) { | ||
| 74 | NotImplemented(); | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitWriteGlobalS16(EmitContext&) { | ||
| 78 | NotImplemented(); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) { | ||
| 82 | if (ctx.profile.support_int64) { | ||
| 83 | return ctx.Add("WriteGlobal32({},{});", address, value); | ||
| 84 | } | ||
| 85 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 86 | } | ||
| 87 | |||
| 88 | void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) { | ||
| 89 | if (ctx.profile.support_int64) { | ||
| 90 | return ctx.Add("WriteGlobal64({},{});", address, value); | ||
| 91 | } | ||
| 92 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) { | ||
| 96 | if (ctx.profile.support_int64) { | ||
| 97 | return ctx.Add("WriteGlobal128({},{});", address, value); | ||
| 98 | } | ||
| 99 | LOG_WARNING(Shader_GLSL, "Int64 not supported, ignoring memory operation"); | ||
| 100 | } | ||
| 101 | |||
| 102 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 103 | const IR::Value& offset) { | ||
| 104 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 105 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int({}%4)*8,8);", inst, ctx.stage_name, | ||
| 106 | binding.U32(), offset_var, offset_var); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 110 | const IR::Value& offset) { | ||
| 111 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 112 | ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int({}%4)*8,8);", inst, ctx.stage_name, | ||
| 113 | binding.U32(), offset_var, offset_var); | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 117 | const IR::Value& offset) { | ||
| 118 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 119 | ctx.AddU32("{}=bitfieldExtract({}_ssbo{}[{}>>2],int(({}>>1)%2)*16,16);", inst, ctx.stage_name, | ||
| 120 | binding.U32(), offset_var, offset_var); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 124 | const IR::Value& offset) { | ||
| 125 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 126 | ctx.AddU32("{}=bitfieldExtract(int({}_ssbo{}[{}>>2]),int(({}>>1)%2)*16,16);", inst, | ||
| 127 | ctx.stage_name, binding.U32(), offset_var, offset_var); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 131 | const IR::Value& offset) { | ||
| 132 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 133 | ctx.AddU32("{}={}_ssbo{}[{}>>2];", inst, ctx.stage_name, binding.U32(), offset_var); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 137 | const IR::Value& offset) { | ||
| 138 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 139 | ctx.AddU32x2("{}=uvec2({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2]);", inst, ctx.stage_name, | ||
| 140 | binding.U32(), offset_var, ctx.stage_name, binding.U32(), offset_var); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 144 | const IR::Value& offset) { | ||
| 145 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 146 | ctx.AddU32x4("{}=uvec4({}_ssbo{}[{}>>2],{}_ssbo{}[({}+4)>>2],{}_ssbo{}[({}+8)>>2],{}_ssbo{}[({}" | ||
| 147 | "+12)>>2]);", | ||
| 148 | inst, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, binding.U32(), | ||
| 149 | offset_var, ctx.stage_name, binding.U32(), offset_var, ctx.stage_name, | ||
| 150 | binding.U32(), offset_var); | ||
| 151 | } | ||
| 152 | |||
| 153 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 154 | std::string_view value) { | ||
| 155 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 156 | const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; | ||
| 157 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 161 | std::string_view value) { | ||
| 162 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 163 | const auto bit_offset{fmt::format("int({}%4)*8", offset_var)}; | ||
| 164 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 8); | ||
| 165 | } | ||
| 166 | |||
| 167 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 168 | std::string_view value) { | ||
| 169 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 170 | const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; | ||
| 171 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 175 | std::string_view value) { | ||
| 176 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 177 | const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset_var)}; | ||
| 178 | SsboWriteCas(ctx, binding, offset_var, value, bit_offset, 16); | ||
| 179 | } | ||
| 180 | |||
| 181 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 182 | std::string_view value) { | ||
| 183 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 184 | ctx.Add("{}_ssbo{}[{}>>2]={};", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 188 | std::string_view value) { | ||
| 189 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 190 | ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 191 | ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 195 | std::string_view value) { | ||
| 196 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||
| 197 | ctx.Add("{}_ssbo{}[{}>>2]={}.x;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 198 | ctx.Add("{}_ssbo{}[({}+4)>>2]={}.y;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 199 | ctx.Add("{}_ssbo{}[({}+8)>>2]={}.z;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 200 | ctx.Add("{}_ssbo{}[({}+12)>>2]={}.w;", ctx.stage_name, binding.U32(), offset_var, value); | ||
| 201 | } | ||
| 202 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp new file mode 100644 index 000000000..f420fe388 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | #ifdef _MSC_VER | ||
| 12 | #pragma warning(disable : 4100) | ||
| 13 | #endif | ||
| 14 | |||
| 15 | namespace Shader::Backend::GLSL { | ||
| 16 | |||
| 17 | void EmitGetRegister(EmitContext& ctx) { | ||
| 18 | NotImplemented(); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitSetRegister(EmitContext& ctx) { | ||
| 22 | NotImplemented(); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitGetPred(EmitContext& ctx) { | ||
| 26 | NotImplemented(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void EmitSetPred(EmitContext& ctx) { | ||
| 30 | NotImplemented(); | ||
| 31 | } | ||
| 32 | |||
| 33 | void EmitSetGotoVariable(EmitContext& ctx) { | ||
| 34 | NotImplemented(); | ||
| 35 | } | ||
| 36 | |||
| 37 | void EmitGetGotoVariable(EmitContext& ctx) { | ||
| 38 | NotImplemented(); | ||
| 39 | } | ||
| 40 | |||
| 41 | void EmitSetIndirectBranchVariable(EmitContext& ctx) { | ||
| 42 | NotImplemented(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitGetIndirectBranchVariable(EmitContext& ctx) { | ||
| 46 | NotImplemented(); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitGetZFlag(EmitContext& ctx) { | ||
| 50 | NotImplemented(); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitGetSFlag(EmitContext& ctx) { | ||
| 54 | NotImplemented(); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitGetCFlag(EmitContext& ctx) { | ||
| 58 | NotImplemented(); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitGetOFlag(EmitContext& ctx) { | ||
| 62 | NotImplemented(); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitSetZFlag(EmitContext& ctx) { | ||
| 66 | NotImplemented(); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitSetSFlag(EmitContext& ctx) { | ||
| 70 | NotImplemented(); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitSetCFlag(EmitContext& ctx) { | ||
| 74 | NotImplemented(); | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitSetOFlag(EmitContext& ctx) { | ||
| 78 | NotImplemented(); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitGetZeroFromOp(EmitContext& ctx) { | ||
| 82 | NotImplemented(); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitGetSignFromOp(EmitContext& ctx) { | ||
| 86 | NotImplemented(); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitGetCarryFromOp(EmitContext& ctx) { | ||
| 90 | NotImplemented(); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitGetOverflowFromOp(EmitContext& ctx) { | ||
| 94 | NotImplemented(); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitGetSparseFromOp(EmitContext& ctx) { | ||
| 98 | NotImplemented(); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitGetInBoundsFromOp(EmitContext& ctx) { | ||
| 102 | NotImplemented(); | ||
| 103 | } | ||
| 104 | |||
| 105 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp new file mode 100644 index 000000000..49fba9073 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_select.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 13 | std::string_view true_value, std::string_view false_value) { | ||
| 14 | ctx.AddU1("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, | ||
| 18 | [[maybe_unused]] std::string_view true_value, | ||
| 19 | [[maybe_unused]] std::string_view false_value) { | ||
| 20 | NotImplemented(); | ||
| 21 | } | ||
| 22 | |||
| 23 | void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, | ||
| 24 | [[maybe_unused]] std::string_view true_value, | ||
| 25 | [[maybe_unused]] std::string_view false_value) { | ||
| 26 | NotImplemented(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 30 | std::string_view true_value, std::string_view false_value) { | ||
| 31 | ctx.AddU32("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 32 | } | ||
| 33 | |||
| 34 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 35 | std::string_view true_value, std::string_view false_value) { | ||
| 36 | ctx.AddU64("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view cond, | ||
| 40 | [[maybe_unused]] std::string_view true_value, | ||
| 41 | [[maybe_unused]] std::string_view false_value) { | ||
| 42 | NotImplemented(); | ||
| 43 | } | ||
| 44 | |||
| 45 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 46 | std::string_view true_value, std::string_view false_value) { | ||
| 47 | ctx.AddF32("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 48 | } | ||
| 49 | |||
| 50 | void EmitSelectF64(EmitContext& ctx, IR::Inst& inst, std::string_view cond, | ||
| 51 | std::string_view true_value, std::string_view false_value) { | ||
| 52 | ctx.AddF64("{}={}?{}:{};", inst, cond, true_value, false_value); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp new file mode 100644 index 000000000..518b78f06 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_shared_memory.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | constexpr char cas_loop[]{"for(;;){{uint old_value={};uint " | ||
| 14 | "cas_result=atomicCompSwap({},old_value,bitfieldInsert({},{},{},{}));" | ||
| 15 | "if(cas_result==old_value){{break;}}}}"}; | ||
| 16 | |||
| 17 | void SharedWriteCas(EmitContext& ctx, std::string_view offset, std::string_view value, | ||
| 18 | std::string_view bit_offset, u32 num_bits) { | ||
| 19 | const auto smem{fmt::format("smem[{}>>2]", offset)}; | ||
| 20 | ctx.Add(cas_loop, smem, smem, smem, value, bit_offset, num_bits); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 25 | ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int({}%4)*8,8);", inst, offset, offset); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 29 | ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int({}%4)*8,8);", inst, offset, offset); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 33 | ctx.AddU32("{}=bitfieldExtract(smem[{}>>2],int(({}>>1)%2)*16,16);", inst, offset, offset); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 37 | ctx.AddU32("{}=bitfieldExtract(int(smem[{}>>2]),int(({}>>1)%2)*16,16);", inst, offset, offset); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 41 | ctx.AddU32("{}=smem[{}>>2];", inst, offset); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 45 | ctx.AddU32x2("{}=uvec2(smem[{}>>2],smem[({}+4)>>2]);", inst, offset, offset); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, std::string_view offset) { | ||
| 49 | ctx.AddU32x4("{}=uvec4(smem[{}>>2],smem[({}+4)>>2],smem[({}+8)>>2],smem[({}+12)>>2]);", inst, | ||
| 50 | offset, offset, offset, offset); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitWriteSharedU8(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 54 | const auto bit_offset{fmt::format("int({}%4)*8", offset)}; | ||
| 55 | SharedWriteCas(ctx, offset, value, bit_offset, 8); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitWriteSharedU16(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 59 | const auto bit_offset{fmt::format("int(({}>>1)%2)*16", offset)}; | ||
| 60 | SharedWriteCas(ctx, offset, value, bit_offset, 16); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitWriteSharedU32(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 64 | ctx.Add("smem[{}>>2]={};", offset, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitWriteSharedU64(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 68 | ctx.Add("smem[{}>>2]={}.x;", offset, value); | ||
| 69 | ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitWriteSharedU128(EmitContext& ctx, std::string_view offset, std::string_view value) { | ||
| 73 | ctx.Add("smem[{}>>2]={}.x;", offset, value); | ||
| 74 | ctx.Add("smem[({}+4)>>2]={}.y;", offset, value); | ||
| 75 | ctx.Add("smem[({}+8)>>2]={}.z;", offset, value); | ||
| 76 | ctx.Add("smem[({}+12)>>2]={}.w;", offset, value); | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp new file mode 100644 index 000000000..9b866f889 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLSL { | ||
| 14 | namespace { | ||
| 15 | std::string_view OutputVertexIndex(EmitContext& ctx) { | ||
| 16 | return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; | ||
| 17 | } | ||
| 18 | |||
| 19 | void InitializeOutputVaryings(EmitContext& ctx) { | ||
| 20 | if (ctx.uses_geometry_passthrough) { | ||
| 21 | return; | ||
| 22 | } | ||
| 23 | if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { | ||
| 24 | ctx.Add("gl_Position=vec4(0,0,0,1);"); | ||
| 25 | } | ||
| 26 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 27 | if (!ctx.info.stores.Generic(index)) { | ||
| 28 | continue; | ||
| 29 | } | ||
| 30 | const auto& info_array{ctx.output_generics.at(index)}; | ||
| 31 | const auto output_decorator{OutputVertexIndex(ctx)}; | ||
| 32 | size_t element{}; | ||
| 33 | while (element < info_array.size()) { | ||
| 34 | const auto& info{info_array.at(element)}; | ||
| 35 | const auto varying_name{fmt::format("{}{}", info.name, output_decorator)}; | ||
| 36 | switch (info.num_components) { | ||
| 37 | case 1: { | ||
| 38 | const char value{element == 3 ? '1' : '0'}; | ||
| 39 | ctx.Add("{}={}.f;", varying_name, value); | ||
| 40 | break; | ||
| 41 | } | ||
| 42 | case 2: | ||
| 43 | case 3: | ||
| 44 | if (element + info.num_components < 4) { | ||
| 45 | ctx.Add("{}=vec{}(0);", varying_name, info.num_components); | ||
| 46 | } else { | ||
| 47 | // last element is the w component, must be initialized to 1 | ||
| 48 | const auto zeros{info.num_components == 3 ? "0,0," : "0,"}; | ||
| 49 | ctx.Add("{}=vec{}({}1);", varying_name, info.num_components, zeros); | ||
| 50 | } | ||
| 51 | break; | ||
| 52 | case 4: | ||
| 53 | ctx.Add("{}=vec4(0,0,0,1);", varying_name); | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | element += info.num_components; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } | ||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 65 | const size_t num_args{phi.NumArgs()}; | ||
| 66 | for (size_t i = 0; i < num_args; ++i) { | ||
| 67 | ctx.var_alloc.Consume(phi.Arg(i)); | ||
| 68 | } | ||
| 69 | if (!phi.Definition<Id>().is_valid) { | ||
| 70 | // The phi node wasn't forward defined | ||
| 71 | ctx.var_alloc.PhiDefine(phi, phi.Arg(0).Type()); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitVoid(EmitContext&) {} | ||
| 76 | |||
| 77 | void EmitReference(EmitContext& ctx, const IR::Value& value) { | ||
| 78 | ctx.var_alloc.Consume(value); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { | ||
| 82 | IR::Inst& phi{*phi_value.InstRecursive()}; | ||
| 83 | const auto phi_type{phi.Arg(0).Type()}; | ||
| 84 | if (!phi.Definition<Id>().is_valid) { | ||
| 85 | // The phi node wasn't forward defined | ||
| 86 | ctx.var_alloc.PhiDefine(phi, phi_type); | ||
| 87 | } | ||
| 88 | const auto phi_reg{ctx.var_alloc.Consume(IR::Value{&phi})}; | ||
| 89 | const auto val_reg{ctx.var_alloc.Consume(value)}; | ||
| 90 | if (phi_reg == val_reg) { | ||
| 91 | return; | ||
| 92 | } | ||
| 93 | ctx.Add("{}={};", phi_reg, val_reg); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitPrologue(EmitContext& ctx) { | ||
| 97 | InitializeOutputVaryings(ctx); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitEpilogue(EmitContext&) {} | ||
| 101 | |||
| 102 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | ||
| 103 | ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); | ||
| 104 | InitializeOutputVaryings(ctx); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 108 | ctx.Add("EndStreamPrimitive(int({}));", ctx.var_alloc.Consume(stream)); | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp new file mode 100644 index 000000000..15bf02dd6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_undefined.cpp | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLSL { | ||
| 11 | |||
| 12 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { | ||
| 13 | ctx.AddU1("{}=false;", inst); | ||
| 14 | } | ||
| 15 | |||
| 16 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { | ||
| 17 | ctx.AddU32("{}=0u;", inst); | ||
| 18 | } | ||
| 19 | |||
| 20 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { | ||
| 21 | ctx.AddU32("{}=0u;", inst); | ||
| 22 | } | ||
| 23 | |||
| 24 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { | ||
| 25 | ctx.AddU32("{}=0u;", inst); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { | ||
| 29 | ctx.AddU64("{}=0u;", inst); | ||
| 30 | } | ||
| 31 | |||
| 32 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..a982dd8a2 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { | ||
| 15 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 16 | if (!in_bounds) { | ||
| 17 | return; | ||
| 18 | } | ||
| 19 | ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); | ||
| 20 | in_bounds->Invalidate(); | ||
| 21 | } | ||
| 22 | |||
| 23 | std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { | ||
| 24 | return fmt::format("({}&{})", thread_id, segmentation_mask); | ||
| 25 | } | ||
| 26 | |||
| 27 | std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, | ||
| 28 | std::string_view not_seg_mask) { | ||
| 29 | return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); | ||
| 30 | } | ||
| 31 | |||
| 32 | std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, | ||
| 33 | std::string_view segmentation_mask) { | ||
| 34 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 35 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 36 | return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); | ||
| 37 | } | ||
| 38 | |||
| 39 | void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, | ||
| 40 | std::string_view value, std::string_view index, | ||
| 41 | [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { | ||
| 42 | const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; | ||
| 43 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); | ||
| 44 | SetInBoundsFlag(ctx, inst); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | ||
| 49 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 53 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 54 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||
| 55 | } else { | ||
| 56 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 57 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 58 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 63 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 64 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | ||
| 65 | } else { | ||
| 66 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 67 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 68 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 74 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||
| 75 | } else { | ||
| 76 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 77 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 78 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 79 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 85 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | ||
| 86 | } else { | ||
| 87 | ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 92 | ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 96 | ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 100 | ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 104 | ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 108 | ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 112 | std::string_view index, std::string_view clamp, | ||
| 113 | std::string_view segmentation_mask) { | ||
| 114 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 115 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 119 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 120 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 121 | const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; | ||
| 122 | |||
| 123 | const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; | ||
| 124 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | ||
| 125 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 126 | SetInBoundsFlag(ctx, inst); | ||
| 127 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | ||
| 131 | std::string_view clamp, std::string_view segmentation_mask) { | ||
| 132 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 133 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 137 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 138 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | ||
| 139 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | ||
| 140 | SetInBoundsFlag(ctx, inst); | ||
| 141 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 145 | std::string_view index, std::string_view clamp, | ||
| 146 | std::string_view segmentation_mask) { | ||
| 147 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 148 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 152 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 153 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | ||
| 154 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 155 | SetInBoundsFlag(ctx, inst); | ||
| 156 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 160 | std::string_view index, std::string_view clamp, | ||
| 161 | std::string_view segmentation_mask) { | ||
| 162 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 163 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); | ||
| 164 | return; | ||
| 165 | } | ||
| 166 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 167 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 168 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | ||
| 169 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 170 | SetInBoundsFlag(ctx, inst); | ||
| 171 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, | ||
| 175 | std::string_view swizzle) { | ||
| 176 | const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; | ||
| 177 | const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); | ||
| 178 | const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); | ||
| 179 | ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 183 | if (ctx.profile.support_gl_derivative_control) { | ||
| 184 | ctx.AddF32("{}=dFdxFine({});", inst, op_a); | ||
| 185 | } else { | ||
| 186 | LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); | ||
| 187 | ctx.AddF32("{}=dFdx({});", inst, op_a); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 192 | if (ctx.profile.support_gl_derivative_control) { | ||
| 193 | ctx.AddF32("{}=dFdyFine({});", inst, op_a); | ||
| 194 | } else { | ||
| 195 | LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); | ||
| 196 | ctx.AddF32("{}=dFdy({});", inst, op_a); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 201 | if (ctx.profile.support_gl_derivative_control) { | ||
| 202 | ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); | ||
| 203 | } else { | ||
| 204 | LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); | ||
| 205 | ctx.AddF32("{}=dFdx({});", inst, op_a); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 210 | if (ctx.profile.support_gl_derivative_control) { | ||
| 211 | ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); | ||
| 212 | } else { | ||
| 213 | LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); | ||
| 214 | ctx.AddF32("{}=dFdy({});", inst, op_a); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.cpp b/src/shader_recompiler/backend/glsl/var_alloc.cpp new file mode 100644 index 000000000..194f926ca --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.cpp | |||
| @@ -0,0 +1,308 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "shader_recompiler/backend/glsl/var_alloc.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLSL { | ||
| 15 | namespace { | ||
| 16 | std::string TypePrefix(GlslVarType type) { | ||
| 17 | switch (type) { | ||
| 18 | case GlslVarType::U1: | ||
| 19 | return "b_"; | ||
| 20 | case GlslVarType::F16x2: | ||
| 21 | return "f16x2_"; | ||
| 22 | case GlslVarType::U32: | ||
| 23 | return "u_"; | ||
| 24 | case GlslVarType::F32: | ||
| 25 | return "f_"; | ||
| 26 | case GlslVarType::U64: | ||
| 27 | return "u64_"; | ||
| 28 | case GlslVarType::F64: | ||
| 29 | return "d_"; | ||
| 30 | case GlslVarType::U32x2: | ||
| 31 | return "u2_"; | ||
| 32 | case GlslVarType::F32x2: | ||
| 33 | return "f2_"; | ||
| 34 | case GlslVarType::U32x3: | ||
| 35 | return "u3_"; | ||
| 36 | case GlslVarType::F32x3: | ||
| 37 | return "f3_"; | ||
| 38 | case GlslVarType::U32x4: | ||
| 39 | return "u4_"; | ||
| 40 | case GlslVarType::F32x4: | ||
| 41 | return "f4_"; | ||
| 42 | case GlslVarType::PrecF32: | ||
| 43 | return "pf_"; | ||
| 44 | case GlslVarType::PrecF64: | ||
| 45 | return "pd_"; | ||
| 46 | case GlslVarType::Void: | ||
| 47 | return ""; | ||
| 48 | default: | ||
| 49 | throw NotImplementedException("Type {}", type); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | std::string FormatFloat(std::string_view value, IR::Type type) { | ||
| 54 | // TODO: Confirm FP64 nan/inf | ||
| 55 | if (type == IR::Type::F32) { | ||
| 56 | if (value == "nan") { | ||
| 57 | return "utof(0x7fc00000)"; | ||
| 58 | } | ||
| 59 | if (value == "inf") { | ||
| 60 | return "utof(0x7f800000)"; | ||
| 61 | } | ||
| 62 | if (value == "-inf") { | ||
| 63 | return "utof(0xff800000)"; | ||
| 64 | } | ||
| 65 | } | ||
| 66 | if (value.find_first_of('e') != std::string_view::npos) { | ||
| 67 | // scientific notation | ||
| 68 | const auto cast{type == IR::Type::F32 ? "float" : "double"}; | ||
| 69 | return fmt::format("{}({})", cast, value); | ||
| 70 | } | ||
| 71 | const bool needs_dot{value.find_first_of('.') == std::string_view::npos}; | ||
| 72 | const bool needs_suffix{!value.ends_with('f')}; | ||
| 73 | const auto suffix{type == IR::Type::F32 ? "f" : "lf"}; | ||
| 74 | return fmt::format("{}{}{}", value, needs_dot ? "." : "", needs_suffix ? suffix : ""); | ||
| 75 | } | ||
| 76 | |||
| 77 | std::string MakeImm(const IR::Value& value) { | ||
| 78 | switch (value.Type()) { | ||
| 79 | case IR::Type::U1: | ||
| 80 | return fmt::format("{}", value.U1() ? "true" : "false"); | ||
| 81 | case IR::Type::U32: | ||
| 82 | return fmt::format("{}u", value.U32()); | ||
| 83 | case IR::Type::F32: | ||
| 84 | return FormatFloat(fmt::format("{}", value.F32()), IR::Type::F32); | ||
| 85 | case IR::Type::U64: | ||
| 86 | return fmt::format("{}ul", value.U64()); | ||
| 87 | case IR::Type::F64: | ||
| 88 | return FormatFloat(fmt::format("{}", value.F64()), IR::Type::F64); | ||
| 89 | case IR::Type::Void: | ||
| 90 | return ""; | ||
| 91 | default: | ||
| 92 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | } // Anonymous namespace | ||
| 96 | |||
| 97 | std::string VarAlloc::Representation(u32 index, GlslVarType type) const { | ||
| 98 | const auto prefix{TypePrefix(type)}; | ||
| 99 | return fmt::format("{}{}", prefix, index); | ||
| 100 | } | ||
| 101 | |||
| 102 | std::string VarAlloc::Representation(Id id) const { | ||
| 103 | return Representation(id.index, id.type); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string VarAlloc::Define(IR::Inst& inst, GlslVarType type) { | ||
| 107 | if (inst.HasUses()) { | ||
| 108 | inst.SetDefinition<Id>(Alloc(type)); | ||
| 109 | return Representation(inst.Definition<Id>()); | ||
| 110 | } else { | ||
| 111 | Id id{}; | ||
| 112 | id.type.Assign(type); | ||
| 113 | GetUseTracker(type).uses_temp = true; | ||
| 114 | inst.SetDefinition<Id>(id); | ||
| 115 | return 't' + Representation(inst.Definition<Id>()); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | std::string VarAlloc::Define(IR::Inst& inst, IR::Type type) { | ||
| 120 | return Define(inst, RegType(type)); | ||
| 121 | } | ||
| 122 | |||
| 123 | std::string VarAlloc::PhiDefine(IR::Inst& inst, IR::Type type) { | ||
| 124 | return AddDefine(inst, RegType(type)); | ||
| 125 | } | ||
| 126 | |||
| 127 | std::string VarAlloc::AddDefine(IR::Inst& inst, GlslVarType type) { | ||
| 128 | if (inst.HasUses()) { | ||
| 129 | inst.SetDefinition<Id>(Alloc(type)); | ||
| 130 | return Representation(inst.Definition<Id>()); | ||
| 131 | } else { | ||
| 132 | return ""; | ||
| 133 | } | ||
| 134 | return Representation(inst.Definition<Id>()); | ||
| 135 | } | ||
| 136 | |||
| 137 | std::string VarAlloc::Consume(const IR::Value& value) { | ||
| 138 | return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); | ||
| 139 | } | ||
| 140 | |||
| 141 | std::string VarAlloc::ConsumeInst(IR::Inst& inst) { | ||
| 142 | inst.DestructiveRemoveUsage(); | ||
| 143 | if (!inst.HasUses()) { | ||
| 144 | Free(inst.Definition<Id>()); | ||
| 145 | } | ||
| 146 | return Representation(inst.Definition<Id>()); | ||
| 147 | } | ||
| 148 | |||
| 149 | std::string VarAlloc::GetGlslType(IR::Type type) const { | ||
| 150 | return GetGlslType(RegType(type)); | ||
| 151 | } | ||
| 152 | |||
| 153 | Id VarAlloc::Alloc(GlslVarType type) { | ||
| 154 | auto& use_tracker{GetUseTracker(type)}; | ||
| 155 | const auto num_vars{use_tracker.var_use.size()}; | ||
| 156 | for (size_t var = 0; var < num_vars; ++var) { | ||
| 157 | if (use_tracker.var_use[var]) { | ||
| 158 | continue; | ||
| 159 | } | ||
| 160 | use_tracker.num_used = std::max(use_tracker.num_used, var + 1); | ||
| 161 | use_tracker.var_use[var] = true; | ||
| 162 | Id ret{}; | ||
| 163 | ret.is_valid.Assign(1); | ||
| 164 | ret.type.Assign(type); | ||
| 165 | ret.index.Assign(static_cast<u32>(var)); | ||
| 166 | return ret; | ||
| 167 | } | ||
| 168 | // Allocate a new variable | ||
| 169 | use_tracker.var_use.push_back(true); | ||
| 170 | Id ret{}; | ||
| 171 | ret.is_valid.Assign(1); | ||
| 172 | ret.type.Assign(type); | ||
| 173 | ret.index.Assign(static_cast<u32>(use_tracker.num_used)); | ||
| 174 | ++use_tracker.num_used; | ||
| 175 | return ret; | ||
| 176 | } | ||
| 177 | |||
| 178 | void VarAlloc::Free(Id id) { | ||
| 179 | if (id.is_valid == 0) { | ||
| 180 | throw LogicError("Freeing invalid variable"); | ||
| 181 | } | ||
| 182 | auto& use_tracker{GetUseTracker(id.type)}; | ||
| 183 | use_tracker.var_use[id.index] = false; | ||
| 184 | } | ||
| 185 | |||
| 186 | GlslVarType VarAlloc::RegType(IR::Type type) const { | ||
| 187 | switch (type) { | ||
| 188 | case IR::Type::U1: | ||
| 189 | return GlslVarType::U1; | ||
| 190 | case IR::Type::U32: | ||
| 191 | return GlslVarType::U32; | ||
| 192 | case IR::Type::F32: | ||
| 193 | return GlslVarType::F32; | ||
| 194 | case IR::Type::U64: | ||
| 195 | return GlslVarType::U64; | ||
| 196 | case IR::Type::F64: | ||
| 197 | return GlslVarType::F64; | ||
| 198 | default: | ||
| 199 | throw NotImplementedException("IR type {}", type); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | std::string VarAlloc::GetGlslType(GlslVarType type) const { | ||
| 204 | switch (type) { | ||
| 205 | case GlslVarType::U1: | ||
| 206 | return "bool"; | ||
| 207 | case GlslVarType::F16x2: | ||
| 208 | return "f16vec2"; | ||
| 209 | case GlslVarType::U32: | ||
| 210 | return "uint"; | ||
| 211 | case GlslVarType::F32: | ||
| 212 | case GlslVarType::PrecF32: | ||
| 213 | return "float"; | ||
| 214 | case GlslVarType::U64: | ||
| 215 | return "uint64_t"; | ||
| 216 | case GlslVarType::F64: | ||
| 217 | case GlslVarType::PrecF64: | ||
| 218 | return "double"; | ||
| 219 | case GlslVarType::U32x2: | ||
| 220 | return "uvec2"; | ||
| 221 | case GlslVarType::F32x2: | ||
| 222 | return "vec2"; | ||
| 223 | case GlslVarType::U32x3: | ||
| 224 | return "uvec3"; | ||
| 225 | case GlslVarType::F32x3: | ||
| 226 | return "vec3"; | ||
| 227 | case GlslVarType::U32x4: | ||
| 228 | return "uvec4"; | ||
| 229 | case GlslVarType::F32x4: | ||
| 230 | return "vec4"; | ||
| 231 | case GlslVarType::Void: | ||
| 232 | return ""; | ||
| 233 | default: | ||
| 234 | throw NotImplementedException("Type {}", type); | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) { | ||
| 239 | switch (type) { | ||
| 240 | case GlslVarType::U1: | ||
| 241 | return var_bool; | ||
| 242 | case GlslVarType::F16x2: | ||
| 243 | return var_f16x2; | ||
| 244 | case GlslVarType::U32: | ||
| 245 | return var_u32; | ||
| 246 | case GlslVarType::F32: | ||
| 247 | return var_f32; | ||
| 248 | case GlslVarType::U64: | ||
| 249 | return var_u64; | ||
| 250 | case GlslVarType::F64: | ||
| 251 | return var_f64; | ||
| 252 | case GlslVarType::U32x2: | ||
| 253 | return var_u32x2; | ||
| 254 | case GlslVarType::F32x2: | ||
| 255 | return var_f32x2; | ||
| 256 | case GlslVarType::U32x3: | ||
| 257 | return var_u32x3; | ||
| 258 | case GlslVarType::F32x3: | ||
| 259 | return var_f32x3; | ||
| 260 | case GlslVarType::U32x4: | ||
| 261 | return var_u32x4; | ||
| 262 | case GlslVarType::F32x4: | ||
| 263 | return var_f32x4; | ||
| 264 | case GlslVarType::PrecF32: | ||
| 265 | return var_precf32; | ||
| 266 | case GlslVarType::PrecF64: | ||
| 267 | return var_precf64; | ||
| 268 | default: | ||
| 269 | throw NotImplementedException("Type {}", type); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 | const VarAlloc::UseTracker& VarAlloc::GetUseTracker(GlslVarType type) const { | ||
| 274 | switch (type) { | ||
| 275 | case GlslVarType::U1: | ||
| 276 | return var_bool; | ||
| 277 | case GlslVarType::F16x2: | ||
| 278 | return var_f16x2; | ||
| 279 | case GlslVarType::U32: | ||
| 280 | return var_u32; | ||
| 281 | case GlslVarType::F32: | ||
| 282 | return var_f32; | ||
| 283 | case GlslVarType::U64: | ||
| 284 | return var_u64; | ||
| 285 | case GlslVarType::F64: | ||
| 286 | return var_f64; | ||
| 287 | case GlslVarType::U32x2: | ||
| 288 | return var_u32x2; | ||
| 289 | case GlslVarType::F32x2: | ||
| 290 | return var_f32x2; | ||
| 291 | case GlslVarType::U32x3: | ||
| 292 | return var_u32x3; | ||
| 293 | case GlslVarType::F32x3: | ||
| 294 | return var_f32x3; | ||
| 295 | case GlslVarType::U32x4: | ||
| 296 | return var_u32x4; | ||
| 297 | case GlslVarType::F32x4: | ||
| 298 | return var_f32x4; | ||
| 299 | case GlslVarType::PrecF32: | ||
| 300 | return var_precf32; | ||
| 301 | case GlslVarType::PrecF64: | ||
| 302 | return var_precf64; | ||
| 303 | default: | ||
| 304 | throw NotImplementedException("Type {}", type); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/glsl/var_alloc.h b/src/shader_recompiler/backend/glsl/var_alloc.h new file mode 100644 index 000000000..8b49f32a6 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/var_alloc.h | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <string> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Shader::IR { | ||
| 15 | class Inst; | ||
| 16 | class Value; | ||
| 17 | enum class Type; | ||
| 18 | } // namespace Shader::IR | ||
| 19 | |||
| 20 | namespace Shader::Backend::GLSL { | ||
| 21 | enum class GlslVarType : u32 { | ||
| 22 | U1, | ||
| 23 | F16x2, | ||
| 24 | U32, | ||
| 25 | F32, | ||
| 26 | U64, | ||
| 27 | F64, | ||
| 28 | U32x2, | ||
| 29 | F32x2, | ||
| 30 | U32x3, | ||
| 31 | F32x3, | ||
| 32 | U32x4, | ||
| 33 | F32x4, | ||
| 34 | PrecF32, | ||
| 35 | PrecF64, | ||
| 36 | Void, | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct Id { | ||
| 40 | union { | ||
| 41 | u32 raw; | ||
| 42 | BitField<0, 1, u32> is_valid; | ||
| 43 | BitField<1, 4, GlslVarType> type; | ||
| 44 | BitField<6, 26, u32> index; | ||
| 45 | }; | ||
| 46 | |||
| 47 | bool operator==(Id rhs) const noexcept { | ||
| 48 | return raw == rhs.raw; | ||
| 49 | } | ||
| 50 | bool operator!=(Id rhs) const noexcept { | ||
| 51 | return !operator==(rhs); | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | static_assert(sizeof(Id) == sizeof(u32)); | ||
| 55 | |||
| 56 | class VarAlloc { | ||
| 57 | public: | ||
| 58 | struct UseTracker { | ||
| 59 | bool uses_temp{}; | ||
| 60 | size_t num_used{}; | ||
| 61 | std::vector<bool> var_use; | ||
| 62 | }; | ||
| 63 | |||
| 64 | /// Used for explicit usages of variables, may revert to temporaries | ||
| 65 | std::string Define(IR::Inst& inst, GlslVarType type); | ||
| 66 | std::string Define(IR::Inst& inst, IR::Type type); | ||
| 67 | |||
| 68 | /// Used to assign variables used by the IR. May return a blank string if | ||
| 69 | /// the instruction's result is unused in the IR. | ||
| 70 | std::string AddDefine(IR::Inst& inst, GlslVarType type); | ||
| 71 | std::string PhiDefine(IR::Inst& inst, IR::Type type); | ||
| 72 | |||
| 73 | std::string Consume(const IR::Value& value); | ||
| 74 | std::string ConsumeInst(IR::Inst& inst); | ||
| 75 | |||
| 76 | std::string GetGlslType(GlslVarType type) const; | ||
| 77 | std::string GetGlslType(IR::Type type) const; | ||
| 78 | |||
| 79 | const UseTracker& GetUseTracker(GlslVarType type) const; | ||
| 80 | std::string Representation(u32 index, GlslVarType type) const; | ||
| 81 | |||
| 82 | private: | ||
| 83 | GlslVarType RegType(IR::Type type) const; | ||
| 84 | Id Alloc(GlslVarType type); | ||
| 85 | void Free(Id id); | ||
| 86 | UseTracker& GetUseTracker(GlslVarType type); | ||
| 87 | std::string Representation(Id id) const; | ||
| 88 | |||
| 89 | UseTracker var_bool{}; | ||
| 90 | UseTracker var_f16x2{}; | ||
| 91 | UseTracker var_u32{}; | ||
| 92 | UseTracker var_u32x2{}; | ||
| 93 | UseTracker var_u32x3{}; | ||
| 94 | UseTracker var_u32x4{}; | ||
| 95 | UseTracker var_f32{}; | ||
| 96 | UseTracker var_f32x2{}; | ||
| 97 | UseTracker var_f32x3{}; | ||
| 98 | UseTracker var_f32x4{}; | ||
| 99 | UseTracker var_u64{}; | ||
| 100 | UseTracker var_f64{}; | ||
| 101 | UseTracker var_precf32{}; | ||
| 102 | UseTracker var_precf64{}; | ||
| 103 | }; | ||
| 104 | |||
| 105 | } // namespace Shader::Backend::GLSL | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp new file mode 100644 index 000000000..2d29d8c14 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -0,0 +1,1368 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <climits> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "common/div_ceil.h" | ||
| 14 | #include "shader_recompiler/backend/spirv/emit_context.h" | ||
| 15 | |||
| 16 | namespace Shader::Backend::SPIRV { | ||
| 17 | namespace { | ||
| 18 | enum class Operation { | ||
| 19 | Increment, | ||
| 20 | Decrement, | ||
| 21 | FPAdd, | ||
| 22 | FPMin, | ||
| 23 | FPMax, | ||
| 24 | }; | ||
| 25 | |||
| 26 | struct AttrInfo { | ||
| 27 | Id pointer; | ||
| 28 | Id id; | ||
| 29 | bool needs_cast; | ||
| 30 | }; | ||
| 31 | |||
| 32 | Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { | ||
| 33 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||
| 34 | const Id type{ctx.F32[1]}; | ||
| 35 | const bool depth{desc.is_depth}; | ||
| 36 | switch (desc.type) { | ||
| 37 | case TextureType::Color1D: | ||
| 38 | return ctx.TypeImage(type, spv::Dim::Dim1D, depth, false, false, 1, format); | ||
| 39 | case TextureType::ColorArray1D: | ||
| 40 | return ctx.TypeImage(type, spv::Dim::Dim1D, depth, true, false, 1, format); | ||
| 41 | case TextureType::Color2D: | ||
| 42 | return ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format); | ||
| 43 | case TextureType::ColorArray2D: | ||
| 44 | return ctx.TypeImage(type, spv::Dim::Dim2D, depth, true, false, 1, format); | ||
| 45 | case TextureType::Color3D: | ||
| 46 | return ctx.TypeImage(type, spv::Dim::Dim3D, depth, false, false, 1, format); | ||
| 47 | case TextureType::ColorCube: | ||
| 48 | return ctx.TypeImage(type, spv::Dim::Cube, depth, false, false, 1, format); | ||
| 49 | case TextureType::ColorArrayCube: | ||
| 50 | return ctx.TypeImage(type, spv::Dim::Cube, depth, true, false, 1, format); | ||
| 51 | case TextureType::Buffer: | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | throw InvalidArgument("Invalid texture type {}", desc.type); | ||
| 55 | } | ||
| 56 | |||
| 57 | spv::ImageFormat GetImageFormat(ImageFormat format) { | ||
| 58 | switch (format) { | ||
| 59 | case ImageFormat::Typeless: | ||
| 60 | return spv::ImageFormat::Unknown; | ||
| 61 | case ImageFormat::R8_UINT: | ||
| 62 | return spv::ImageFormat::R8ui; | ||
| 63 | case ImageFormat::R8_SINT: | ||
| 64 | return spv::ImageFormat::R8i; | ||
| 65 | case ImageFormat::R16_UINT: | ||
| 66 | return spv::ImageFormat::R16ui; | ||
| 67 | case ImageFormat::R16_SINT: | ||
| 68 | return spv::ImageFormat::R16i; | ||
| 69 | case ImageFormat::R32_UINT: | ||
| 70 | return spv::ImageFormat::R32ui; | ||
| 71 | case ImageFormat::R32G32_UINT: | ||
| 72 | return spv::ImageFormat::Rg32ui; | ||
| 73 | case ImageFormat::R32G32B32A32_UINT: | ||
| 74 | return spv::ImageFormat::Rgba32ui; | ||
| 75 | } | ||
| 76 | throw InvalidArgument("Invalid image format {}", format); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { | ||
| 80 | const spv::ImageFormat format{GetImageFormat(desc.format)}; | ||
| 81 | const Id type{ctx.U32[1]}; | ||
| 82 | switch (desc.type) { | ||
| 83 | case TextureType::Color1D: | ||
| 84 | return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format); | ||
| 85 | case TextureType::ColorArray1D: | ||
| 86 | return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 2, format); | ||
| 87 | case TextureType::Color2D: | ||
| 88 | return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 2, format); | ||
| 89 | case TextureType::ColorArray2D: | ||
| 90 | return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 2, format); | ||
| 91 | case TextureType::Color3D: | ||
| 92 | return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 2, format); | ||
| 93 | case TextureType::Buffer: | ||
| 94 | throw NotImplementedException("Image buffer"); | ||
| 95 | default: | ||
| 96 | break; | ||
| 97 | } | ||
| 98 | throw InvalidArgument("Invalid texture type {}", desc.type); | ||
| 99 | } | ||
| 100 | |||
| 101 | Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin, | ||
| 102 | spv::StorageClass storage_class) { | ||
| 103 | const Id pointer_type{ctx.TypePointer(storage_class, type)}; | ||
| 104 | const Id id{ctx.AddGlobalVariable(pointer_type, storage_class)}; | ||
| 105 | if (builtin) { | ||
| 106 | ctx.Decorate(id, spv::Decoration::BuiltIn, *builtin); | ||
| 107 | } | ||
| 108 | ctx.interfaces.push_back(id); | ||
| 109 | return id; | ||
| 110 | } | ||
| 111 | |||
| 112 | u32 NumVertices(InputTopology input_topology) { | ||
| 113 | switch (input_topology) { | ||
| 114 | case InputTopology::Points: | ||
| 115 | return 1; | ||
| 116 | case InputTopology::Lines: | ||
| 117 | return 2; | ||
| 118 | case InputTopology::LinesAdjacency: | ||
| 119 | return 4; | ||
| 120 | case InputTopology::Triangles: | ||
| 121 | return 3; | ||
| 122 | case InputTopology::TrianglesAdjacency: | ||
| 123 | return 6; | ||
| 124 | } | ||
| 125 | throw InvalidArgument("Invalid input topology {}", input_topology); | ||
| 126 | } | ||
| 127 | |||
| 128 | Id DefineInput(EmitContext& ctx, Id type, bool per_invocation, | ||
| 129 | std::optional<spv::BuiltIn> builtin = std::nullopt) { | ||
| 130 | switch (ctx.stage) { | ||
| 131 | case Stage::TessellationControl: | ||
| 132 | case Stage::TessellationEval: | ||
| 133 | if (per_invocation) { | ||
| 134 | type = ctx.TypeArray(type, ctx.Const(32u)); | ||
| 135 | } | ||
| 136 | break; | ||
| 137 | case Stage::Geometry: | ||
| 138 | if (per_invocation) { | ||
| 139 | const u32 num_vertices{NumVertices(ctx.runtime_info.input_topology)}; | ||
| 140 | type = ctx.TypeArray(type, ctx.Const(num_vertices)); | ||
| 141 | } | ||
| 142 | break; | ||
| 143 | default: | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id DefineOutput(EmitContext& ctx, Id type, std::optional<u32> invocations, | ||
| 150 | std::optional<spv::BuiltIn> builtin = std::nullopt) { | ||
| 151 | if (invocations && ctx.stage == Stage::TessellationControl) { | ||
| 152 | type = ctx.TypeArray(type, ctx.Const(*invocations)); | ||
| 153 | } | ||
| 154 | return DefineVariable(ctx, type, builtin, spv::StorageClass::Output); | ||
| 155 | } | ||
| 156 | |||
| 157 | void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invocations) { | ||
| 158 | static constexpr std::string_view swizzle{"xyzw"}; | ||
| 159 | const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 160 | u32 element{0}; | ||
| 161 | while (element < 4) { | ||
| 162 | const u32 remainder{4 - element}; | ||
| 163 | const TransformFeedbackVarying* xfb_varying{}; | ||
| 164 | if (!ctx.runtime_info.xfb_varyings.empty()) { | ||
| 165 | xfb_varying = &ctx.runtime_info.xfb_varyings[base_attr_index + element]; | ||
| 166 | xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; | ||
| 167 | } | ||
| 168 | const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; | ||
| 169 | |||
| 170 | const Id id{DefineOutput(ctx, ctx.F32[num_components], invocations)}; | ||
| 171 | ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 172 | if (element > 0) { | ||
| 173 | ctx.Decorate(id, spv::Decoration::Component, element); | ||
| 174 | } | ||
| 175 | if (xfb_varying) { | ||
| 176 | ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer); | ||
| 177 | ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride); | ||
| 178 | ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset); | ||
| 179 | } | ||
| 180 | if (num_components < 4 || element > 0) { | ||
| 181 | const std::string_view subswizzle{swizzle.substr(element, num_components)}; | ||
| 182 | ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle)); | ||
| 183 | } else { | ||
| 184 | ctx.Name(id, fmt::format("out_attr{}", index)); | ||
| 185 | } | ||
| 186 | const GenericElementInfo info{ | ||
| 187 | .id = id, | ||
| 188 | .first_element = element, | ||
| 189 | .num_components = num_components, | ||
| 190 | }; | ||
| 191 | std::fill_n(ctx.output_generics[index].begin() + element, num_components, info); | ||
| 192 | element += num_components; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | Id GetAttributeType(EmitContext& ctx, AttributeType type) { | ||
| 197 | switch (type) { | ||
| 198 | case AttributeType::Float: | ||
| 199 | return ctx.F32[4]; | ||
| 200 | case AttributeType::SignedInt: | ||
| 201 | return ctx.TypeVector(ctx.TypeInt(32, true), 4); | ||
| 202 | case AttributeType::UnsignedInt: | ||
| 203 | return ctx.U32[4]; | ||
| 204 | case AttributeType::Disabled: | ||
| 205 | break; | ||
| 206 | } | ||
| 207 | throw InvalidArgument("Invalid attribute type {}", type); | ||
| 208 | } | ||
| 209 | |||
| 210 | std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { | ||
| 211 | const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; | ||
| 212 | switch (type) { | ||
| 213 | case AttributeType::Float: | ||
| 214 | return AttrInfo{ctx.input_f32, ctx.F32[1], false}; | ||
| 215 | case AttributeType::UnsignedInt: | ||
| 216 | return AttrInfo{ctx.input_u32, ctx.U32[1], true}; | ||
| 217 | case AttributeType::SignedInt: | ||
| 218 | return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; | ||
| 219 | case AttributeType::Disabled: | ||
| 220 | return std::nullopt; | ||
| 221 | } | ||
| 222 | throw InvalidArgument("Invalid attribute type {}", type); | ||
| 223 | } | ||
| 224 | |||
| 225 | std::string_view StageName(Stage stage) { | ||
| 226 | switch (stage) { | ||
| 227 | case Stage::VertexA: | ||
| 228 | return "vs_a"; | ||
| 229 | case Stage::VertexB: | ||
| 230 | return "vs"; | ||
| 231 | case Stage::TessellationControl: | ||
| 232 | return "tcs"; | ||
| 233 | case Stage::TessellationEval: | ||
| 234 | return "tes"; | ||
| 235 | case Stage::Geometry: | ||
| 236 | return "gs"; | ||
| 237 | case Stage::Fragment: | ||
| 238 | return "fs"; | ||
| 239 | case Stage::Compute: | ||
| 240 | return "cs"; | ||
| 241 | } | ||
| 242 | throw InvalidArgument("Invalid stage {}", stage); | ||
| 243 | } | ||
| 244 | |||
| 245 | template <typename... Args> | ||
| 246 | void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... args) { | ||
| 247 | ctx.Name(object, fmt::format(fmt::runtime(format_str), StageName(ctx.stage), | ||
| 248 | std::forward<Args>(args)...) | ||
| 249 | .c_str()); | ||
| 250 | } | ||
| 251 | |||
| 252 | void DefineConstBuffers(EmitContext& ctx, const Info& info, Id UniformDefinitions::*member_type, | ||
| 253 | u32 binding, Id type, char type_char, u32 element_size) { | ||
| 254 | const Id array_type{ctx.TypeArray(type, ctx.Const(65536U / element_size))}; | ||
| 255 | ctx.Decorate(array_type, spv::Decoration::ArrayStride, element_size); | ||
| 256 | |||
| 257 | const Id struct_type{ctx.TypeStruct(array_type)}; | ||
| 258 | Name(ctx, struct_type, "{}_cbuf_block_{}{}", ctx.stage, type_char, element_size * CHAR_BIT); | ||
| 259 | ctx.Decorate(struct_type, spv::Decoration::Block); | ||
| 260 | ctx.MemberName(struct_type, 0, "data"); | ||
| 261 | ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); | ||
| 262 | |||
| 263 | const Id struct_pointer_type{ctx.TypePointer(spv::StorageClass::Uniform, struct_type)}; | ||
| 264 | const Id uniform_type{ctx.TypePointer(spv::StorageClass::Uniform, type)}; | ||
| 265 | ctx.uniform_types.*member_type = uniform_type; | ||
| 266 | |||
| 267 | for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { | ||
| 268 | const Id id{ctx.AddGlobalVariable(struct_pointer_type, spv::StorageClass::Uniform)}; | ||
| 269 | ctx.Decorate(id, spv::Decoration::Binding, binding); | ||
| 270 | ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 271 | ctx.Name(id, fmt::format("c{}", desc.index)); | ||
| 272 | for (size_t i = 0; i < desc.count; ++i) { | ||
| 273 | ctx.cbufs[desc.index + i].*member_type = id; | ||
| 274 | } | ||
| 275 | if (ctx.profile.supported_spirv >= 0x00010400) { | ||
| 276 | ctx.interfaces.push_back(id); | ||
| 277 | } | ||
| 278 | binding += desc.count; | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | void DefineSsbos(EmitContext& ctx, StorageTypeDefinition& type_def, | ||
| 283 | Id StorageDefinitions::*member_type, const Info& info, u32 binding, Id type, | ||
| 284 | u32 stride) { | ||
| 285 | const Id array_type{ctx.TypeRuntimeArray(type)}; | ||
| 286 | ctx.Decorate(array_type, spv::Decoration::ArrayStride, stride); | ||
| 287 | |||
| 288 | const Id struct_type{ctx.TypeStruct(array_type)}; | ||
| 289 | ctx.Decorate(struct_type, spv::Decoration::Block); | ||
| 290 | ctx.MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); | ||
| 291 | |||
| 292 | const Id struct_pointer{ctx.TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; | ||
| 293 | type_def.array = struct_pointer; | ||
| 294 | type_def.element = ctx.TypePointer(spv::StorageClass::StorageBuffer, type); | ||
| 295 | |||
| 296 | u32 index{}; | ||
| 297 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | ||
| 298 | const Id id{ctx.AddGlobalVariable(struct_pointer, spv::StorageClass::StorageBuffer)}; | ||
| 299 | ctx.Decorate(id, spv::Decoration::Binding, binding); | ||
| 300 | ctx.Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 301 | ctx.Name(id, fmt::format("ssbo{}", index)); | ||
| 302 | if (ctx.profile.supported_spirv >= 0x00010400) { | ||
| 303 | ctx.interfaces.push_back(id); | ||
| 304 | } | ||
| 305 | for (size_t i = 0; i < desc.count; ++i) { | ||
| 306 | ctx.ssbos[index + i].*member_type = id; | ||
| 307 | } | ||
| 308 | index += desc.count; | ||
| 309 | binding += desc.count; | ||
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | Id CasFunction(EmitContext& ctx, Operation operation, Id value_type) { | ||
| 314 | const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; | ||
| 315 | const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 316 | const Id op_a{ctx.OpFunctionParameter(value_type)}; | ||
| 317 | const Id op_b{ctx.OpFunctionParameter(value_type)}; | ||
| 318 | ctx.AddLabel(); | ||
| 319 | Id result{}; | ||
| 320 | switch (operation) { | ||
| 321 | case Operation::Increment: { | ||
| 322 | const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; | ||
| 323 | const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; | ||
| 324 | result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); | ||
| 325 | break; | ||
| 326 | } | ||
| 327 | case Operation::Decrement: { | ||
| 328 | const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; | ||
| 329 | const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; | ||
| 330 | const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; | ||
| 331 | const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; | ||
| 332 | result = ctx.OpSelect(value_type, pred, op_b, decr); | ||
| 333 | break; | ||
| 334 | } | ||
| 335 | case Operation::FPAdd: | ||
| 336 | result = ctx.OpFAdd(value_type, op_a, op_b); | ||
| 337 | break; | ||
| 338 | case Operation::FPMin: | ||
| 339 | result = ctx.OpFMin(value_type, op_a, op_b); | ||
| 340 | break; | ||
| 341 | case Operation::FPMax: | ||
| 342 | result = ctx.OpFMax(value_type, op_a, op_b); | ||
| 343 | break; | ||
| 344 | default: | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | ctx.OpReturnValue(result); | ||
| 348 | ctx.OpFunctionEnd(); | ||
| 349 | return func; | ||
| 350 | } | ||
| 351 | |||
| 352 | Id CasLoop(EmitContext& ctx, Operation operation, Id array_pointer, Id element_pointer, | ||
| 353 | Id value_type, Id memory_type, spv::Scope scope) { | ||
| 354 | const bool is_shared{scope == spv::Scope::Workgroup}; | ||
| 355 | const bool is_struct{!is_shared || ctx.profile.support_explicit_workgroup_layout}; | ||
| 356 | const Id cas_func{CasFunction(ctx, operation, value_type)}; | ||
| 357 | const Id zero{ctx.u32_zero_value}; | ||
| 358 | const Id scope_id{ctx.Const(static_cast<u32>(scope))}; | ||
| 359 | |||
| 360 | const Id loop_header{ctx.OpLabel()}; | ||
| 361 | const Id continue_block{ctx.OpLabel()}; | ||
| 362 | const Id merge_block{ctx.OpLabel()}; | ||
| 363 | const Id func_type{is_shared | ||
| 364 | ? ctx.TypeFunction(value_type, ctx.U32[1], value_type) | ||
| 365 | : ctx.TypeFunction(value_type, ctx.U32[1], value_type, array_pointer)}; | ||
| 366 | |||
| 367 | const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 368 | const Id index{ctx.OpFunctionParameter(ctx.U32[1])}; | ||
| 369 | const Id op_b{ctx.OpFunctionParameter(value_type)}; | ||
| 370 | const Id base{is_shared ? ctx.shared_memory_u32 : ctx.OpFunctionParameter(array_pointer)}; | ||
| 371 | ctx.AddLabel(); | ||
| 372 | ctx.OpBranch(loop_header); | ||
| 373 | ctx.AddLabel(loop_header); | ||
| 374 | |||
| 375 | ctx.OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 376 | ctx.OpBranch(continue_block); | ||
| 377 | |||
| 378 | ctx.AddLabel(continue_block); | ||
| 379 | const Id word_pointer{is_struct ? ctx.OpAccessChain(element_pointer, base, zero, index) | ||
| 380 | : ctx.OpAccessChain(element_pointer, base, index)}; | ||
| 381 | if (value_type.value == ctx.F32[2].value) { | ||
| 382 | const Id u32_value{ctx.OpLoad(ctx.U32[1], word_pointer)}; | ||
| 383 | const Id value{ctx.OpUnpackHalf2x16(ctx.F32[2], u32_value)}; | ||
| 384 | const Id new_value{ctx.OpFunctionCall(value_type, cas_func, value, op_b)}; | ||
| 385 | const Id u32_new_value{ctx.OpPackHalf2x16(ctx.U32[1], new_value)}; | ||
| 386 | const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, | ||
| 387 | zero, u32_new_value, u32_value)}; | ||
| 388 | const Id success{ctx.OpIEqual(ctx.U1, atomic_res, u32_value)}; | ||
| 389 | ctx.OpBranchConditional(success, merge_block, loop_header); | ||
| 390 | |||
| 391 | ctx.AddLabel(merge_block); | ||
| 392 | ctx.OpReturnValue(ctx.OpUnpackHalf2x16(ctx.F32[2], atomic_res)); | ||
| 393 | } else { | ||
| 394 | const Id value{ctx.OpLoad(memory_type, word_pointer)}; | ||
| 395 | const bool matching_type{value_type.value == memory_type.value}; | ||
| 396 | const Id bitcast_value{matching_type ? value : ctx.OpBitcast(value_type, value)}; | ||
| 397 | const Id cal_res{ctx.OpFunctionCall(value_type, cas_func, bitcast_value, op_b)}; | ||
| 398 | const Id new_value{matching_type ? cal_res : ctx.OpBitcast(memory_type, cal_res)}; | ||
| 399 | const Id atomic_res{ctx.OpAtomicCompareExchange(ctx.U32[1], word_pointer, scope_id, zero, | ||
| 400 | zero, new_value, value)}; | ||
| 401 | const Id success{ctx.OpIEqual(ctx.U1, atomic_res, value)}; | ||
| 402 | ctx.OpBranchConditional(success, merge_block, loop_header); | ||
| 403 | |||
| 404 | ctx.AddLabel(merge_block); | ||
| 405 | ctx.OpReturnValue(ctx.OpBitcast(value_type, atomic_res)); | ||
| 406 | } | ||
| 407 | ctx.OpFunctionEnd(); | ||
| 408 | return func; | ||
| 409 | } | ||
| 410 | |||
| 411 | template <typename Desc> | ||
| 412 | std::string NameOf(Stage stage, const Desc& desc, std::string_view prefix) { | ||
| 413 | if (desc.count > 1) { | ||
| 414 | return fmt::format("{}_{}{}_{:02x}x{}", StageName(stage), prefix, desc.cbuf_index, | ||
| 415 | desc.cbuf_offset, desc.count); | ||
| 416 | } else { | ||
| 417 | return fmt::format("{}_{}{}_{:02x}", StageName(stage), prefix, desc.cbuf_index, | ||
| 418 | desc.cbuf_offset); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { | ||
| 423 | if (count > 1) { | ||
| 424 | const Id array_type{ctx.TypeArray(sampled_type, ctx.Const(count))}; | ||
| 425 | return ctx.TypePointer(spv::StorageClass::UniformConstant, array_type); | ||
| 426 | } else { | ||
| 427 | return pointer_type; | ||
| 428 | } | ||
| 429 | } | ||
| 430 | } // Anonymous namespace | ||
| 431 | |||
| 432 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | ||
| 433 | defs[0] = sirit_ctx.Name(base_type, name); | ||
| 434 | |||
| 435 | std::array<char, 6> def_name; | ||
| 436 | for (int i = 1; i < 4; ++i) { | ||
| 437 | const std::string_view def_name_view( | ||
| 438 | def_name.data(), | ||
| 439 | fmt::format_to_n(def_name.data(), def_name.size(), "{}x{}", name, i + 1).size); | ||
| 440 | defs[static_cast<size_t>(i)] = | ||
| 441 | sirit_ctx.Name(sirit_ctx.TypeVector(base_type, i + 1), def_name_view); | ||
| 442 | } | ||
| 443 | } | ||
| 444 | |||
| 445 | EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_, | ||
| 446 | IR::Program& program, Bindings& bindings) | ||
| 447 | : Sirit::Module(profile_.supported_spirv), profile{profile_}, | ||
| 448 | runtime_info{runtime_info_}, stage{program.stage} { | ||
| 449 | const bool is_unified{profile.unified_descriptor_binding}; | ||
| 450 | u32& uniform_binding{is_unified ? bindings.unified : bindings.uniform_buffer}; | ||
| 451 | u32& storage_binding{is_unified ? bindings.unified : bindings.storage_buffer}; | ||
| 452 | u32& texture_binding{is_unified ? bindings.unified : bindings.texture}; | ||
| 453 | u32& image_binding{is_unified ? bindings.unified : bindings.image}; | ||
| 454 | AddCapability(spv::Capability::Shader); | ||
| 455 | DefineCommonTypes(program.info); | ||
| 456 | DefineCommonConstants(); | ||
| 457 | DefineInterfaces(program); | ||
| 458 | DefineLocalMemory(program); | ||
| 459 | DefineSharedMemory(program); | ||
| 460 | DefineSharedMemoryFunctions(program); | ||
| 461 | DefineConstantBuffers(program.info, uniform_binding); | ||
| 462 | DefineStorageBuffers(program.info, storage_binding); | ||
| 463 | DefineTextureBuffers(program.info, texture_binding); | ||
| 464 | DefineImageBuffers(program.info, image_binding); | ||
| 465 | DefineTextures(program.info, texture_binding); | ||
| 466 | DefineImages(program.info, image_binding); | ||
| 467 | DefineAttributeMemAccess(program.info); | ||
| 468 | DefineGlobalMemoryFunctions(program.info); | ||
| 469 | } | ||
| 470 | |||
| 471 | EmitContext::~EmitContext() = default; | ||
| 472 | |||
| 473 | Id EmitContext::Def(const IR::Value& value) { | ||
| 474 | if (!value.IsImmediate()) { | ||
| 475 | return value.InstRecursive()->Definition<Id>(); | ||
| 476 | } | ||
| 477 | switch (value.Type()) { | ||
| 478 | case IR::Type::Void: | ||
| 479 | // Void instructions are used for optional arguments (e.g. texture offsets) | ||
| 480 | // They are not meant to be used in the SPIR-V module | ||
| 481 | return Id{}; | ||
| 482 | case IR::Type::U1: | ||
| 483 | return value.U1() ? true_value : false_value; | ||
| 484 | case IR::Type::U32: | ||
| 485 | return Const(value.U32()); | ||
| 486 | case IR::Type::U64: | ||
| 487 | return Constant(U64, value.U64()); | ||
| 488 | case IR::Type::F32: | ||
| 489 | return Const(value.F32()); | ||
| 490 | case IR::Type::F64: | ||
| 491 | return Constant(F64[1], value.F64()); | ||
| 492 | default: | ||
| 493 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | Id EmitContext::BitOffset8(const IR::Value& offset) { | ||
| 498 | if (offset.IsImmediate()) { | ||
| 499 | return Const((offset.U32() % 4) * 8); | ||
| 500 | } | ||
| 501 | return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(24u)); | ||
| 502 | } | ||
| 503 | |||
| 504 | Id EmitContext::BitOffset16(const IR::Value& offset) { | ||
| 505 | if (offset.IsImmediate()) { | ||
| 506 | return Const(((offset.U32() / 2) % 2) * 16); | ||
| 507 | } | ||
| 508 | return OpBitwiseAnd(U32[1], OpShiftLeftLogical(U32[1], Def(offset), Const(3u)), Const(16u)); | ||
| 509 | } | ||
| 510 | |||
| 511 | void EmitContext::DefineCommonTypes(const Info& info) { | ||
| 512 | void_id = TypeVoid(); | ||
| 513 | |||
| 514 | U1 = Name(TypeBool(), "u1"); | ||
| 515 | |||
| 516 | F32.Define(*this, TypeFloat(32), "f32"); | ||
| 517 | U32.Define(*this, TypeInt(32, false), "u32"); | ||
| 518 | S32.Define(*this, TypeInt(32, true), "s32"); | ||
| 519 | |||
| 520 | private_u32 = Name(TypePointer(spv::StorageClass::Private, U32[1]), "private_u32"); | ||
| 521 | |||
| 522 | input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32"); | ||
| 523 | input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32"); | ||
| 524 | input_s32 = Name(TypePointer(spv::StorageClass::Input, TypeInt(32, true)), "input_s32"); | ||
| 525 | |||
| 526 | output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32"); | ||
| 527 | output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32"); | ||
| 528 | |||
| 529 | if (info.uses_int8 && profile.support_int8) { | ||
| 530 | AddCapability(spv::Capability::Int8); | ||
| 531 | U8 = Name(TypeInt(8, false), "u8"); | ||
| 532 | S8 = Name(TypeInt(8, true), "s8"); | ||
| 533 | } | ||
| 534 | if (info.uses_int16 && profile.support_int16) { | ||
| 535 | AddCapability(spv::Capability::Int16); | ||
| 536 | U16 = Name(TypeInt(16, false), "u16"); | ||
| 537 | S16 = Name(TypeInt(16, true), "s16"); | ||
| 538 | } | ||
| 539 | if (info.uses_int64) { | ||
| 540 | AddCapability(spv::Capability::Int64); | ||
| 541 | U64 = Name(TypeInt(64, false), "u64"); | ||
| 542 | } | ||
| 543 | if (info.uses_fp16) { | ||
| 544 | AddCapability(spv::Capability::Float16); | ||
| 545 | F16.Define(*this, TypeFloat(16), "f16"); | ||
| 546 | } | ||
| 547 | if (info.uses_fp64) { | ||
| 548 | AddCapability(spv::Capability::Float64); | ||
| 549 | F64.Define(*this, TypeFloat(64), "f64"); | ||
| 550 | } | ||
| 551 | } | ||
| 552 | |||
| 553 | void EmitContext::DefineCommonConstants() { | ||
| 554 | true_value = ConstantTrue(U1); | ||
| 555 | false_value = ConstantFalse(U1); | ||
| 556 | u32_zero_value = Const(0U); | ||
| 557 | f32_zero_value = Const(0.0f); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitContext::DefineInterfaces(const IR::Program& program) { | ||
| 561 | DefineInputs(program); | ||
| 562 | DefineOutputs(program); | ||
| 563 | } | ||
| 564 | |||
| 565 | void EmitContext::DefineLocalMemory(const IR::Program& program) { | ||
| 566 | if (program.local_memory_size == 0) { | ||
| 567 | return; | ||
| 568 | } | ||
| 569 | const u32 num_elements{Common::DivCeil(program.local_memory_size, 4U)}; | ||
| 570 | const Id type{TypeArray(U32[1], Const(num_elements))}; | ||
| 571 | const Id pointer{TypePointer(spv::StorageClass::Private, type)}; | ||
| 572 | local_memory = AddGlobalVariable(pointer, spv::StorageClass::Private); | ||
| 573 | if (profile.supported_spirv >= 0x00010400) { | ||
| 574 | interfaces.push_back(local_memory); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | |||
| 578 | void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||
| 579 | if (program.shared_memory_size == 0) { | ||
| 580 | return; | ||
| 581 | } | ||
| 582 | const auto make{[&](Id element_type, u32 element_size) { | ||
| 583 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, element_size)}; | ||
| 584 | const Id array_type{TypeArray(element_type, Const(num_elements))}; | ||
| 585 | Decorate(array_type, spv::Decoration::ArrayStride, element_size); | ||
| 586 | |||
| 587 | const Id struct_type{TypeStruct(array_type)}; | ||
| 588 | MemberDecorate(struct_type, 0U, spv::Decoration::Offset, 0U); | ||
| 589 | Decorate(struct_type, spv::Decoration::Block); | ||
| 590 | |||
| 591 | const Id pointer{TypePointer(spv::StorageClass::Workgroup, struct_type)}; | ||
| 592 | const Id element_pointer{TypePointer(spv::StorageClass::Workgroup, element_type)}; | ||
| 593 | const Id variable{AddGlobalVariable(pointer, spv::StorageClass::Workgroup)}; | ||
| 594 | Decorate(variable, spv::Decoration::Aliased); | ||
| 595 | interfaces.push_back(variable); | ||
| 596 | |||
| 597 | return std::make_tuple(variable, element_pointer, pointer); | ||
| 598 | }}; | ||
| 599 | if (profile.support_explicit_workgroup_layout) { | ||
| 600 | AddExtension("SPV_KHR_workgroup_memory_explicit_layout"); | ||
| 601 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayoutKHR); | ||
| 602 | if (program.info.uses_int8) { | ||
| 603 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR); | ||
| 604 | std::tie(shared_memory_u8, shared_u8, std::ignore) = make(U8, 1); | ||
| 605 | } | ||
| 606 | if (program.info.uses_int16) { | ||
| 607 | AddCapability(spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR); | ||
| 608 | std::tie(shared_memory_u16, shared_u16, std::ignore) = make(U16, 2); | ||
| 609 | } | ||
| 610 | if (program.info.uses_int64) { | ||
| 611 | std::tie(shared_memory_u64, shared_u64, std::ignore) = make(U64, 8); | ||
| 612 | } | ||
| 613 | std::tie(shared_memory_u32, shared_u32, shared_memory_u32_type) = make(U32[1], 4); | ||
| 614 | std::tie(shared_memory_u32x2, shared_u32x2, std::ignore) = make(U32[2], 8); | ||
| 615 | std::tie(shared_memory_u32x4, shared_u32x4, std::ignore) = make(U32[4], 16); | ||
| 616 | return; | ||
| 617 | } | ||
| 618 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; | ||
| 619 | const Id type{TypeArray(U32[1], Const(num_elements))}; | ||
| 620 | shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); | ||
| 621 | |||
| 622 | shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); | ||
| 623 | shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); | ||
| 624 | interfaces.push_back(shared_memory_u32); | ||
| 625 | |||
| 626 | const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; | ||
| 627 | const auto make_function{[&](u32 mask, u32 size) { | ||
| 628 | const Id loop_header{OpLabel()}; | ||
| 629 | const Id continue_block{OpLabel()}; | ||
| 630 | const Id merge_block{OpLabel()}; | ||
| 631 | |||
| 632 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 633 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 634 | const Id insert_value{OpFunctionParameter(U32[1])}; | ||
| 635 | AddLabel(); | ||
| 636 | OpBranch(loop_header); | ||
| 637 | |||
| 638 | AddLabel(loop_header); | ||
| 639 | const Id word_offset{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; | ||
| 640 | const Id shift_offset{OpShiftLeftLogical(U32[1], offset, Const(3U))}; | ||
| 641 | const Id bit_offset{OpBitwiseAnd(U32[1], shift_offset, Const(mask))}; | ||
| 642 | const Id count{Const(size)}; | ||
| 643 | OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 644 | OpBranch(continue_block); | ||
| 645 | |||
| 646 | AddLabel(continue_block); | ||
| 647 | const Id word_pointer{OpAccessChain(shared_u32, shared_memory_u32, word_offset)}; | ||
| 648 | const Id old_value{OpLoad(U32[1], word_pointer)}; | ||
| 649 | const Id new_value{OpBitFieldInsert(U32[1], old_value, insert_value, bit_offset, count)}; | ||
| 650 | const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, Const(1U), u32_zero_value, | ||
| 651 | u32_zero_value, new_value, old_value)}; | ||
| 652 | const Id success{OpIEqual(U1, atomic_res, old_value)}; | ||
| 653 | OpBranchConditional(success, merge_block, loop_header); | ||
| 654 | |||
| 655 | AddLabel(merge_block); | ||
| 656 | OpReturn(); | ||
| 657 | OpFunctionEnd(); | ||
| 658 | return func; | ||
| 659 | }}; | ||
| 660 | if (program.info.uses_int8) { | ||
| 661 | shared_store_u8_func = make_function(24, 8); | ||
| 662 | } | ||
| 663 | if (program.info.uses_int16) { | ||
| 664 | shared_store_u16_func = make_function(16, 16); | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | void EmitContext::DefineSharedMemoryFunctions(const IR::Program& program) { | ||
| 669 | if (program.info.uses_shared_increment) { | ||
| 670 | increment_cas_shared = CasLoop(*this, Operation::Increment, shared_memory_u32_type, | ||
| 671 | shared_u32, U32[1], U32[1], spv::Scope::Workgroup); | ||
| 672 | } | ||
| 673 | if (program.info.uses_shared_decrement) { | ||
| 674 | decrement_cas_shared = CasLoop(*this, Operation::Decrement, shared_memory_u32_type, | ||
| 675 | shared_u32, U32[1], U32[1], spv::Scope::Workgroup); | ||
| 676 | } | ||
| 677 | } | ||
| 678 | |||
| 679 | void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||
| 680 | const auto make_load{[&] { | ||
| 681 | const bool is_array{stage == Stage::Geometry}; | ||
| 682 | const Id end_block{OpLabel()}; | ||
| 683 | const Id default_label{OpLabel()}; | ||
| 684 | |||
| 685 | const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) | ||
| 686 | : TypeFunction(F32[1], U32[1])}; | ||
| 687 | const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; | ||
| 688 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 689 | const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; | ||
| 690 | |||
| 691 | AddLabel(); | ||
| 692 | const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; | ||
| 693 | const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; | ||
| 694 | const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; | ||
| 695 | std::vector<Sirit::Literal> literals; | ||
| 696 | std::vector<Id> labels; | ||
| 697 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 698 | literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); | ||
| 699 | labels.push_back(OpLabel()); | ||
| 700 | } | ||
| 701 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 702 | for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { | ||
| 703 | if (!info.loads.Generic(index)) { | ||
| 704 | continue; | ||
| 705 | } | ||
| 706 | literals.push_back(base_attribute_value + index); | ||
| 707 | labels.push_back(OpLabel()); | ||
| 708 | } | ||
| 709 | OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); | ||
| 710 | OpSwitch(compare_index, default_label, literals, labels); | ||
| 711 | AddLabel(default_label); | ||
| 712 | OpReturnValue(Const(0.0f)); | ||
| 713 | size_t label_index{0}; | ||
| 714 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 715 | AddLabel(labels[label_index]); | ||
| 716 | const Id pointer{is_array | ||
| 717 | ? OpAccessChain(input_f32, input_position, vertex, masked_index) | ||
| 718 | : OpAccessChain(input_f32, input_position, masked_index)}; | ||
| 719 | const Id result{OpLoad(F32[1], pointer)}; | ||
| 720 | OpReturnValue(result); | ||
| 721 | ++label_index; | ||
| 722 | } | ||
| 723 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 724 | if (!info.loads.Generic(index)) { | ||
| 725 | continue; | ||
| 726 | } | ||
| 727 | AddLabel(labels[label_index]); | ||
| 728 | const auto type{AttrTypes(*this, static_cast<u32>(index))}; | ||
| 729 | if (!type) { | ||
| 730 | OpReturnValue(Const(0.0f)); | ||
| 731 | ++label_index; | ||
| 732 | continue; | ||
| 733 | } | ||
| 734 | const Id generic_id{input_generics.at(index)}; | ||
| 735 | const Id pointer{is_array | ||
| 736 | ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) | ||
| 737 | : OpAccessChain(type->pointer, generic_id, masked_index)}; | ||
| 738 | const Id value{OpLoad(type->id, pointer)}; | ||
| 739 | const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; | ||
| 740 | OpReturnValue(result); | ||
| 741 | ++label_index; | ||
| 742 | } | ||
| 743 | AddLabel(end_block); | ||
| 744 | OpUnreachable(); | ||
| 745 | OpFunctionEnd(); | ||
| 746 | return func; | ||
| 747 | }}; | ||
| 748 | const auto make_store{[&] { | ||
| 749 | const Id end_block{OpLabel()}; | ||
| 750 | const Id default_label{OpLabel()}; | ||
| 751 | |||
| 752 | const Id func_type_store{TypeFunction(void_id, U32[1], F32[1])}; | ||
| 753 | const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type_store)}; | ||
| 754 | const Id offset{OpFunctionParameter(U32[1])}; | ||
| 755 | const Id store_value{OpFunctionParameter(F32[1])}; | ||
| 756 | AddLabel(); | ||
| 757 | const Id base_index{OpShiftRightArithmetic(U32[1], offset, Const(2U))}; | ||
| 758 | const Id masked_index{OpBitwiseAnd(U32[1], base_index, Const(3U))}; | ||
| 759 | const Id compare_index{OpShiftRightArithmetic(U32[1], base_index, Const(2U))}; | ||
| 760 | std::vector<Sirit::Literal> literals; | ||
| 761 | std::vector<Id> labels; | ||
| 762 | if (info.stores.AnyComponent(IR::Attribute::PositionX)) { | ||
| 763 | literals.push_back(static_cast<u32>(IR::Attribute::PositionX) >> 2); | ||
| 764 | labels.push_back(OpLabel()); | ||
| 765 | } | ||
| 766 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 767 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 768 | if (!info.stores.Generic(index)) { | ||
| 769 | continue; | ||
| 770 | } | ||
| 771 | literals.push_back(base_attribute_value + static_cast<u32>(index)); | ||
| 772 | labels.push_back(OpLabel()); | ||
| 773 | } | ||
| 774 | if (info.stores.ClipDistances()) { | ||
| 775 | literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance0) >> 2); | ||
| 776 | labels.push_back(OpLabel()); | ||
| 777 | literals.push_back(static_cast<u32>(IR::Attribute::ClipDistance4) >> 2); | ||
| 778 | labels.push_back(OpLabel()); | ||
| 779 | } | ||
| 780 | OpSelectionMerge(end_block, spv::SelectionControlMask::MaskNone); | ||
| 781 | OpSwitch(compare_index, default_label, literals, labels); | ||
| 782 | AddLabel(default_label); | ||
| 783 | OpReturn(); | ||
| 784 | size_t label_index{0}; | ||
| 785 | if (info.stores.AnyComponent(IR::Attribute::PositionX)) { | ||
| 786 | AddLabel(labels[label_index]); | ||
| 787 | const Id pointer{OpAccessChain(output_f32, output_position, masked_index)}; | ||
| 788 | OpStore(pointer, store_value); | ||
| 789 | OpReturn(); | ||
| 790 | ++label_index; | ||
| 791 | } | ||
| 792 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 793 | if (!info.stores.Generic(index)) { | ||
| 794 | continue; | ||
| 795 | } | ||
| 796 | if (output_generics[index][0].num_components != 4) { | ||
| 797 | throw NotImplementedException("Physical stores and transform feedbacks"); | ||
| 798 | } | ||
| 799 | AddLabel(labels[label_index]); | ||
| 800 | const Id generic_id{output_generics[index][0].id}; | ||
| 801 | const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)}; | ||
| 802 | OpStore(pointer, store_value); | ||
| 803 | OpReturn(); | ||
| 804 | ++label_index; | ||
| 805 | } | ||
| 806 | if (info.stores.ClipDistances()) { | ||
| 807 | AddLabel(labels[label_index]); | ||
| 808 | const Id pointer{OpAccessChain(output_f32, clip_distances, masked_index)}; | ||
| 809 | OpStore(pointer, store_value); | ||
| 810 | OpReturn(); | ||
| 811 | ++label_index; | ||
| 812 | AddLabel(labels[label_index]); | ||
| 813 | const Id fixed_index{OpIAdd(U32[1], masked_index, Const(4U))}; | ||
| 814 | const Id pointer2{OpAccessChain(output_f32, clip_distances, fixed_index)}; | ||
| 815 | OpStore(pointer2, store_value); | ||
| 816 | OpReturn(); | ||
| 817 | ++label_index; | ||
| 818 | } | ||
| 819 | AddLabel(end_block); | ||
| 820 | OpUnreachable(); | ||
| 821 | OpFunctionEnd(); | ||
| 822 | return func; | ||
| 823 | }}; | ||
| 824 | if (info.loads_indexed_attributes) { | ||
| 825 | indexed_load_func = make_load(); | ||
| 826 | } | ||
| 827 | if (info.stores_indexed_attributes) { | ||
| 828 | indexed_store_func = make_store(); | ||
| 829 | } | ||
| 830 | } | ||
| 831 | |||
| 832 | void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | ||
| 833 | if (!info.uses_global_memory || !profile.support_int64) { | ||
| 834 | return; | ||
| 835 | } | ||
| 836 | using DefPtr = Id StorageDefinitions::*; | ||
| 837 | const Id zero{u32_zero_value}; | ||
| 838 | const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, | ||
| 839 | auto&& callback) { | ||
| 840 | AddLabel(); | ||
| 841 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||
| 842 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 843 | if (!info.nvn_buffer_used[index]) { | ||
| 844 | continue; | ||
| 845 | } | ||
| 846 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||
| 847 | const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; | ||
| 848 | const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; | ||
| 849 | const Id ssbo_addr_pointer{OpAccessChain( | ||
| 850 | uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; | ||
| 851 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | ||
| 852 | zero, ssbo_size_cbuf_offset)}; | ||
| 853 | |||
| 854 | const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||
| 855 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | ||
| 856 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | ||
| 857 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | ||
| 858 | OpULessThan(U1, addr, ssbo_end))}; | ||
| 859 | const Id then_label{OpLabel()}; | ||
| 860 | const Id else_label{OpLabel()}; | ||
| 861 | OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); | ||
| 862 | OpBranchConditional(cond, then_label, else_label); | ||
| 863 | AddLabel(then_label); | ||
| 864 | const Id ssbo_id{ssbos[index].*ssbo_member}; | ||
| 865 | const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; | ||
| 866 | const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; | ||
| 867 | const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; | ||
| 868 | callback(ssbo_pointer); | ||
| 869 | AddLabel(else_label); | ||
| 870 | } | ||
| 871 | }}; | ||
| 872 | const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||
| 873 | const Id function_type{TypeFunction(type, U64)}; | ||
| 874 | const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; | ||
| 875 | const Id addr{OpFunctionParameter(U64)}; | ||
| 876 | define_body(ssbo_member, addr, element_pointer, shift, | ||
| 877 | [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); | ||
| 878 | OpReturnValue(ConstantNull(type)); | ||
| 879 | OpFunctionEnd(); | ||
| 880 | return func_id; | ||
| 881 | }}; | ||
| 882 | const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||
| 883 | const Id function_type{TypeFunction(void_id, U64, type)}; | ||
| 884 | const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; | ||
| 885 | const Id addr{OpFunctionParameter(U64)}; | ||
| 886 | const Id data{OpFunctionParameter(type)}; | ||
| 887 | define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { | ||
| 888 | OpStore(ssbo_pointer, data); | ||
| 889 | OpReturn(); | ||
| 890 | }); | ||
| 891 | OpReturn(); | ||
| 892 | OpFunctionEnd(); | ||
| 893 | return func_id; | ||
| 894 | }}; | ||
| 895 | const auto define{ | ||
| 896 | [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { | ||
| 897 | const Id element_type{type_def.element}; | ||
| 898 | const u32 shift{static_cast<u32>(std::countr_zero(size))}; | ||
| 899 | const Id load_func{define_load(ssbo_member, element_type, type, shift)}; | ||
| 900 | const Id write_func{define_write(ssbo_member, element_type, type, shift)}; | ||
| 901 | return std::make_pair(load_func, write_func); | ||
| 902 | }}; | ||
| 903 | std::tie(load_global_func_u32, write_global_func_u32) = | ||
| 904 | define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); | ||
| 905 | std::tie(load_global_func_u32x2, write_global_func_u32x2) = | ||
| 906 | define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); | ||
| 907 | std::tie(load_global_func_u32x4, write_global_func_u32x4) = | ||
| 908 | define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); | ||
| 909 | } | ||
| 910 | |||
| 911 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | ||
| 912 | if (info.constant_buffer_descriptors.empty()) { | ||
| 913 | return; | ||
| 914 | } | ||
| 915 | if (!profile.support_descriptor_aliasing) { | ||
| 916 | DefineConstBuffers(*this, info, &UniformDefinitions::U32x4, binding, U32[4], 'u', | ||
| 917 | sizeof(u32[4])); | ||
| 918 | for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) { | ||
| 919 | binding += desc.count; | ||
| 920 | } | ||
| 921 | return; | ||
| 922 | } | ||
| 923 | IR::Type types{info.used_constant_buffer_types}; | ||
| 924 | if (True(types & IR::Type::U8)) { | ||
| 925 | if (profile.support_int8) { | ||
| 926 | DefineConstBuffers(*this, info, &UniformDefinitions::U8, binding, U8, 'u', sizeof(u8)); | ||
| 927 | DefineConstBuffers(*this, info, &UniformDefinitions::S8, binding, S8, 's', sizeof(s8)); | ||
| 928 | } else { | ||
| 929 | types |= IR::Type::U32; | ||
| 930 | } | ||
| 931 | } | ||
| 932 | if (True(types & IR::Type::U16)) { | ||
| 933 | if (profile.support_int16) { | ||
| 934 | DefineConstBuffers(*this, info, &UniformDefinitions::U16, binding, U16, 'u', | ||
| 935 | sizeof(u16)); | ||
| 936 | DefineConstBuffers(*this, info, &UniformDefinitions::S16, binding, S16, 's', | ||
| 937 | sizeof(s16)); | ||
| 938 | } else { | ||
| 939 | types |= IR::Type::U32; | ||
| 940 | } | ||
| 941 | } | ||
| 942 | if (True(types & IR::Type::U32)) { | ||
| 943 | DefineConstBuffers(*this, info, &UniformDefinitions::U32, binding, U32[1], 'u', | ||
| 944 | sizeof(u32)); | ||
| 945 | } | ||
| 946 | if (True(types & IR::Type::F32)) { | ||
| 947 | DefineConstBuffers(*this, info, &UniformDefinitions::F32, binding, F32[1], 'f', | ||
| 948 | sizeof(f32)); | ||
| 949 | } | ||
| 950 | if (True(types & IR::Type::U32x2)) { | ||
| 951 | DefineConstBuffers(*this, info, &UniformDefinitions::U32x2, binding, U32[2], 'u', | ||
| 952 | sizeof(u32[2])); | ||
| 953 | } | ||
| 954 | binding += static_cast<u32>(info.constant_buffer_descriptors.size()); | ||
| 955 | } | ||
| 956 | |||
| 957 | void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | ||
| 958 | if (info.storage_buffers_descriptors.empty()) { | ||
| 959 | return; | ||
| 960 | } | ||
| 961 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | ||
| 962 | |||
| 963 | const IR::Type used_types{profile.support_descriptor_aliasing ? info.used_storage_buffer_types | ||
| 964 | : IR::Type::U32}; | ||
| 965 | if (profile.support_int8 && True(used_types & IR::Type::U8)) { | ||
| 966 | DefineSsbos(*this, storage_types.U8, &StorageDefinitions::U8, info, binding, U8, | ||
| 967 | sizeof(u8)); | ||
| 968 | DefineSsbos(*this, storage_types.S8, &StorageDefinitions::S8, info, binding, S8, | ||
| 969 | sizeof(u8)); | ||
| 970 | } | ||
| 971 | if (profile.support_int16 && True(used_types & IR::Type::U16)) { | ||
| 972 | DefineSsbos(*this, storage_types.U16, &StorageDefinitions::U16, info, binding, U16, | ||
| 973 | sizeof(u16)); | ||
| 974 | DefineSsbos(*this, storage_types.S16, &StorageDefinitions::S16, info, binding, S16, | ||
| 975 | sizeof(u16)); | ||
| 976 | } | ||
| 977 | if (True(used_types & IR::Type::U32)) { | ||
| 978 | DefineSsbos(*this, storage_types.U32, &StorageDefinitions::U32, info, binding, U32[1], | ||
| 979 | sizeof(u32)); | ||
| 980 | } | ||
| 981 | if (True(used_types & IR::Type::F32)) { | ||
| 982 | DefineSsbos(*this, storage_types.F32, &StorageDefinitions::F32, info, binding, F32[1], | ||
| 983 | sizeof(f32)); | ||
| 984 | } | ||
| 985 | if (True(used_types & IR::Type::U64)) { | ||
| 986 | DefineSsbos(*this, storage_types.U64, &StorageDefinitions::U64, info, binding, U64, | ||
| 987 | sizeof(u64)); | ||
| 988 | } | ||
| 989 | if (True(used_types & IR::Type::U32x2)) { | ||
| 990 | DefineSsbos(*this, storage_types.U32x2, &StorageDefinitions::U32x2, info, binding, U32[2], | ||
| 991 | sizeof(u32[2])); | ||
| 992 | } | ||
| 993 | if (True(used_types & IR::Type::U32x4)) { | ||
| 994 | DefineSsbos(*this, storage_types.U32x4, &StorageDefinitions::U32x4, info, binding, U32[4], | ||
| 995 | sizeof(u32[4])); | ||
| 996 | } | ||
| 997 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | ||
| 998 | binding += desc.count; | ||
| 999 | } | ||
| 1000 | const bool needs_function{ | ||
| 1001 | info.uses_global_increment || info.uses_global_decrement || info.uses_atomic_f32_add || | ||
| 1002 | info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max || | ||
| 1003 | info.uses_atomic_f32x2_add || info.uses_atomic_f32x2_min || info.uses_atomic_f32x2_max}; | ||
| 1004 | if (needs_function) { | ||
| 1005 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 1006 | } | ||
| 1007 | if (info.uses_global_increment) { | ||
| 1008 | increment_cas_ssbo = CasLoop(*this, Operation::Increment, storage_types.U32.array, | ||
| 1009 | storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); | ||
| 1010 | } | ||
| 1011 | if (info.uses_global_decrement) { | ||
| 1012 | decrement_cas_ssbo = CasLoop(*this, Operation::Decrement, storage_types.U32.array, | ||
| 1013 | storage_types.U32.element, U32[1], U32[1], spv::Scope::Device); | ||
| 1014 | } | ||
| 1015 | if (info.uses_atomic_f32_add) { | ||
| 1016 | f32_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, | ||
| 1017 | storage_types.U32.element, F32[1], U32[1], spv::Scope::Device); | ||
| 1018 | } | ||
| 1019 | if (info.uses_atomic_f16x2_add) { | ||
| 1020 | f16x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, | ||
| 1021 | storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); | ||
| 1022 | } | ||
| 1023 | if (info.uses_atomic_f16x2_min) { | ||
| 1024 | f16x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, | ||
| 1025 | storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); | ||
| 1026 | } | ||
| 1027 | if (info.uses_atomic_f16x2_max) { | ||
| 1028 | f16x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, | ||
| 1029 | storage_types.U32.element, F16[2], F16[2], spv::Scope::Device); | ||
| 1030 | } | ||
| 1031 | if (info.uses_atomic_f32x2_add) { | ||
| 1032 | f32x2_add_cas = CasLoop(*this, Operation::FPAdd, storage_types.U32.array, | ||
| 1033 | storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); | ||
| 1034 | } | ||
| 1035 | if (info.uses_atomic_f32x2_min) { | ||
| 1036 | f32x2_min_cas = CasLoop(*this, Operation::FPMin, storage_types.U32.array, | ||
| 1037 | storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); | ||
| 1038 | } | ||
| 1039 | if (info.uses_atomic_f32x2_max) { | ||
| 1040 | f32x2_max_cas = CasLoop(*this, Operation::FPMax, storage_types.U32.array, | ||
| 1041 | storage_types.U32.element, F32[2], F32[2], spv::Scope::Device); | ||
| 1042 | } | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { | ||
| 1046 | if (info.texture_buffer_descriptors.empty()) { | ||
| 1047 | return; | ||
| 1048 | } | ||
| 1049 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||
| 1050 | image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); | ||
| 1051 | sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); | ||
| 1052 | |||
| 1053 | const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; | ||
| 1054 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); | ||
| 1055 | for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { | ||
| 1056 | if (desc.count != 1) { | ||
| 1057 | throw NotImplementedException("Array of texture buffers"); | ||
| 1058 | } | ||
| 1059 | const Id id{AddGlobalVariable(type, spv::StorageClass::UniformConstant)}; | ||
| 1060 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1061 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1062 | Name(id, NameOf(stage, desc, "texbuf")); | ||
| 1063 | texture_buffers.push_back({ | ||
| 1064 | .id = id, | ||
| 1065 | .count = desc.count, | ||
| 1066 | }); | ||
| 1067 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1068 | interfaces.push_back(id); | ||
| 1069 | } | ||
| 1070 | ++binding; | ||
| 1071 | } | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { | ||
| 1075 | image_buffers.reserve(info.image_buffer_descriptors.size()); | ||
| 1076 | for (const ImageBufferDescriptor& desc : info.image_buffer_descriptors) { | ||
| 1077 | if (desc.count != 1) { | ||
| 1078 | throw NotImplementedException("Array of image buffers"); | ||
| 1079 | } | ||
| 1080 | const spv::ImageFormat format{GetImageFormat(desc.format)}; | ||
| 1081 | const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; | ||
| 1082 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; | ||
| 1083 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | ||
| 1084 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1085 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1086 | Name(id, NameOf(stage, desc, "imgbuf")); | ||
| 1087 | image_buffers.push_back({ | ||
| 1088 | .id = id, | ||
| 1089 | .image_type = image_type, | ||
| 1090 | .count = desc.count, | ||
| 1091 | }); | ||
| 1092 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1093 | interfaces.push_back(id); | ||
| 1094 | } | ||
| 1095 | ++binding; | ||
| 1096 | } | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void EmitContext::DefineTextures(const Info& info, u32& binding) { | ||
| 1100 | textures.reserve(info.texture_descriptors.size()); | ||
| 1101 | for (const TextureDescriptor& desc : info.texture_descriptors) { | ||
| 1102 | const Id image_type{ImageType(*this, desc)}; | ||
| 1103 | const Id sampled_type{TypeSampledImage(image_type)}; | ||
| 1104 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, sampled_type)}; | ||
| 1105 | const Id desc_type{DescType(*this, sampled_type, pointer_type, desc.count)}; | ||
| 1106 | const Id id{AddGlobalVariable(desc_type, spv::StorageClass::UniformConstant)}; | ||
| 1107 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1108 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1109 | Name(id, NameOf(stage, desc, "tex")); | ||
| 1110 | textures.push_back({ | ||
| 1111 | .id = id, | ||
| 1112 | .sampled_type = sampled_type, | ||
| 1113 | .pointer_type = pointer_type, | ||
| 1114 | .image_type = image_type, | ||
| 1115 | .count = desc.count, | ||
| 1116 | }); | ||
| 1117 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1118 | interfaces.push_back(id); | ||
| 1119 | } | ||
| 1120 | ++binding; | ||
| 1121 | } | ||
| 1122 | if (info.uses_atomic_image_u32) { | ||
| 1123 | image_u32 = TypePointer(spv::StorageClass::Image, U32[1]); | ||
| 1124 | } | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | void EmitContext::DefineImages(const Info& info, u32& binding) { | ||
| 1128 | images.reserve(info.image_descriptors.size()); | ||
| 1129 | for (const ImageDescriptor& desc : info.image_descriptors) { | ||
| 1130 | if (desc.count != 1) { | ||
| 1131 | throw NotImplementedException("Array of images"); | ||
| 1132 | } | ||
| 1133 | const Id image_type{ImageType(*this, desc)}; | ||
| 1134 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; | ||
| 1135 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | ||
| 1136 | Decorate(id, spv::Decoration::Binding, binding); | ||
| 1137 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||
| 1138 | Name(id, NameOf(stage, desc, "img")); | ||
| 1139 | images.push_back({ | ||
| 1140 | .id = id, | ||
| 1141 | .image_type = image_type, | ||
| 1142 | .count = desc.count, | ||
| 1143 | }); | ||
| 1144 | if (profile.supported_spirv >= 0x00010400) { | ||
| 1145 | interfaces.push_back(id); | ||
| 1146 | } | ||
| 1147 | ++binding; | ||
| 1148 | } | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void EmitContext::DefineInputs(const IR::Program& program) { | ||
| 1152 | const Info& info{program.info}; | ||
| 1153 | const VaryingState loads{info.loads.mask | info.passthrough.mask}; | ||
| 1154 | |||
| 1155 | if (info.uses_workgroup_id) { | ||
| 1156 | workgroup_id = DefineInput(*this, U32[3], false, spv::BuiltIn::WorkgroupId); | ||
| 1157 | } | ||
| 1158 | if (info.uses_local_invocation_id) { | ||
| 1159 | local_invocation_id = DefineInput(*this, U32[3], false, spv::BuiltIn::LocalInvocationId); | ||
| 1160 | } | ||
| 1161 | if (info.uses_invocation_id) { | ||
| 1162 | invocation_id = DefineInput(*this, U32[1], false, spv::BuiltIn::InvocationId); | ||
| 1163 | } | ||
| 1164 | if (info.uses_sample_id) { | ||
| 1165 | sample_id = DefineInput(*this, U32[1], false, spv::BuiltIn::SampleId); | ||
| 1166 | } | ||
| 1167 | if (info.uses_is_helper_invocation) { | ||
| 1168 | is_helper_invocation = DefineInput(*this, U1, false, spv::BuiltIn::HelperInvocation); | ||
| 1169 | } | ||
| 1170 | if (info.uses_subgroup_mask) { | ||
| 1171 | subgroup_mask_eq = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupEqMaskKHR); | ||
| 1172 | subgroup_mask_lt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLtMaskKHR); | ||
| 1173 | subgroup_mask_le = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupLeMaskKHR); | ||
| 1174 | subgroup_mask_gt = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGtMaskKHR); | ||
| 1175 | subgroup_mask_ge = DefineInput(*this, U32[4], false, spv::BuiltIn::SubgroupGeMaskKHR); | ||
| 1176 | } | ||
| 1177 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_shuffles || | ||
| 1178 | (profile.warp_size_potentially_larger_than_guest && | ||
| 1179 | (info.uses_subgroup_vote || info.uses_subgroup_mask))) { | ||
| 1180 | subgroup_local_invocation_id = | ||
| 1181 | DefineInput(*this, U32[1], false, spv::BuiltIn::SubgroupLocalInvocationId); | ||
| 1182 | } | ||
| 1183 | if (info.uses_fswzadd) { | ||
| 1184 | const Id f32_one{Const(1.0f)}; | ||
| 1185 | const Id f32_minus_one{Const(-1.0f)}; | ||
| 1186 | const Id f32_zero{Const(0.0f)}; | ||
| 1187 | fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero); | ||
| 1188 | fswzadd_lut_b = | ||
| 1189 | ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one); | ||
| 1190 | } | ||
| 1191 | if (loads[IR::Attribute::PrimitiveId]) { | ||
| 1192 | primitive_id = DefineInput(*this, U32[1], false, spv::BuiltIn::PrimitiveId); | ||
| 1193 | } | ||
| 1194 | if (loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 1195 | const bool is_fragment{stage != Stage::Fragment}; | ||
| 1196 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; | ||
| 1197 | input_position = DefineInput(*this, F32[4], true, built_in); | ||
| 1198 | if (profile.support_geometry_shader_passthrough) { | ||
| 1199 | if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 1200 | Decorate(input_position, spv::Decoration::PassthroughNV); | ||
| 1201 | } | ||
| 1202 | } | ||
| 1203 | } | ||
| 1204 | if (loads[IR::Attribute::InstanceId]) { | ||
| 1205 | if (profile.support_vertex_instance_id) { | ||
| 1206 | instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); | ||
| 1207 | } else { | ||
| 1208 | instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); | ||
| 1209 | base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||
| 1210 | } | ||
| 1211 | } | ||
| 1212 | if (loads[IR::Attribute::VertexId]) { | ||
| 1213 | if (profile.support_vertex_instance_id) { | ||
| 1214 | vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); | ||
| 1215 | } else { | ||
| 1216 | vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); | ||
| 1217 | base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||
| 1218 | } | ||
| 1219 | } | ||
| 1220 | if (loads[IR::Attribute::FrontFace]) { | ||
| 1221 | front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); | ||
| 1222 | } | ||
| 1223 | if (loads[IR::Attribute::PointSpriteS] || loads[IR::Attribute::PointSpriteT]) { | ||
| 1224 | point_coord = DefineInput(*this, F32[2], true, spv::BuiltIn::PointCoord); | ||
| 1225 | } | ||
| 1226 | if (loads[IR::Attribute::TessellationEvaluationPointU] || | ||
| 1227 | loads[IR::Attribute::TessellationEvaluationPointV]) { | ||
| 1228 | tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); | ||
| 1229 | } | ||
| 1230 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 1231 | const AttributeType input_type{runtime_info.generic_input_types[index]}; | ||
| 1232 | if (!runtime_info.previous_stage_stores.Generic(index)) { | ||
| 1233 | continue; | ||
| 1234 | } | ||
| 1235 | if (!loads.Generic(index)) { | ||
| 1236 | continue; | ||
| 1237 | } | ||
| 1238 | if (input_type == AttributeType::Disabled) { | ||
| 1239 | continue; | ||
| 1240 | } | ||
| 1241 | const Id type{GetAttributeType(*this, input_type)}; | ||
| 1242 | const Id id{DefineInput(*this, type, true)}; | ||
| 1243 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 1244 | Name(id, fmt::format("in_attr{}", index)); | ||
| 1245 | input_generics[index] = id; | ||
| 1246 | |||
| 1247 | if (info.passthrough.Generic(index) && profile.support_geometry_shader_passthrough) { | ||
| 1248 | Decorate(id, spv::Decoration::PassthroughNV); | ||
| 1249 | } | ||
| 1250 | if (stage != Stage::Fragment) { | ||
| 1251 | continue; | ||
| 1252 | } | ||
| 1253 | switch (info.interpolation[index]) { | ||
| 1254 | case Interpolation::Smooth: | ||
| 1255 | // Default | ||
| 1256 | // Decorate(id, spv::Decoration::Smooth); | ||
| 1257 | break; | ||
| 1258 | case Interpolation::NoPerspective: | ||
| 1259 | Decorate(id, spv::Decoration::NoPerspective); | ||
| 1260 | break; | ||
| 1261 | case Interpolation::Flat: | ||
| 1262 | Decorate(id, spv::Decoration::Flat); | ||
| 1263 | break; | ||
| 1264 | } | ||
| 1265 | } | ||
| 1266 | if (stage == Stage::TessellationEval) { | ||
| 1267 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 1268 | if (!info.uses_patches[index]) { | ||
| 1269 | continue; | ||
| 1270 | } | ||
| 1271 | const Id id{DefineInput(*this, F32[4], false)}; | ||
| 1272 | Decorate(id, spv::Decoration::Patch); | ||
| 1273 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 1274 | patches[index] = id; | ||
| 1275 | } | ||
| 1276 | } | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | void EmitContext::DefineOutputs(const IR::Program& program) { | ||
| 1280 | const Info& info{program.info}; | ||
| 1281 | const std::optional<u32> invocations{program.invocations}; | ||
| 1282 | if (info.stores.AnyComponent(IR::Attribute::PositionX) || stage == Stage::VertexB) { | ||
| 1283 | output_position = DefineOutput(*this, F32[4], invocations, spv::BuiltIn::Position); | ||
| 1284 | } | ||
| 1285 | if (info.stores[IR::Attribute::PointSize] || runtime_info.fixed_state_point_size) { | ||
| 1286 | if (stage == Stage::Fragment) { | ||
| 1287 | throw NotImplementedException("Storing PointSize in fragment stage"); | ||
| 1288 | } | ||
| 1289 | output_point_size = DefineOutput(*this, F32[1], invocations, spv::BuiltIn::PointSize); | ||
| 1290 | } | ||
| 1291 | if (info.stores.ClipDistances()) { | ||
| 1292 | if (stage == Stage::Fragment) { | ||
| 1293 | throw NotImplementedException("Storing ClipDistance in fragment stage"); | ||
| 1294 | } | ||
| 1295 | const Id type{TypeArray(F32[1], Const(8U))}; | ||
| 1296 | clip_distances = DefineOutput(*this, type, invocations, spv::BuiltIn::ClipDistance); | ||
| 1297 | } | ||
| 1298 | if (info.stores[IR::Attribute::Layer] && | ||
| 1299 | (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { | ||
| 1300 | if (stage == Stage::Fragment) { | ||
| 1301 | throw NotImplementedException("Storing Layer in fragment stage"); | ||
| 1302 | } | ||
| 1303 | layer = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::Layer); | ||
| 1304 | } | ||
| 1305 | if (info.stores[IR::Attribute::ViewportIndex] && | ||
| 1306 | (profile.support_viewport_index_layer_non_geometry || stage == Stage::Geometry)) { | ||
| 1307 | if (stage == Stage::Fragment) { | ||
| 1308 | throw NotImplementedException("Storing ViewportIndex in fragment stage"); | ||
| 1309 | } | ||
| 1310 | viewport_index = DefineOutput(*this, U32[1], invocations, spv::BuiltIn::ViewportIndex); | ||
| 1311 | } | ||
| 1312 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 1313 | viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, | ||
| 1314 | spv::BuiltIn::ViewportMaskNV); | ||
| 1315 | } | ||
| 1316 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 1317 | if (info.stores.Generic(index)) { | ||
| 1318 | DefineGenericOutput(*this, index, invocations); | ||
| 1319 | } | ||
| 1320 | } | ||
| 1321 | switch (stage) { | ||
| 1322 | case Stage::TessellationControl: | ||
| 1323 | if (info.stores_tess_level_outer) { | ||
| 1324 | const Id type{TypeArray(F32[1], Const(4U))}; | ||
| 1325 | output_tess_level_outer = | ||
| 1326 | DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelOuter); | ||
| 1327 | Decorate(output_tess_level_outer, spv::Decoration::Patch); | ||
| 1328 | } | ||
| 1329 | if (info.stores_tess_level_inner) { | ||
| 1330 | const Id type{TypeArray(F32[1], Const(2U))}; | ||
| 1331 | output_tess_level_inner = | ||
| 1332 | DefineOutput(*this, type, std::nullopt, spv::BuiltIn::TessLevelInner); | ||
| 1333 | Decorate(output_tess_level_inner, spv::Decoration::Patch); | ||
| 1334 | } | ||
| 1335 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 1336 | if (!info.uses_patches[index]) { | ||
| 1337 | continue; | ||
| 1338 | } | ||
| 1339 | const Id id{DefineOutput(*this, F32[4], std::nullopt)}; | ||
| 1340 | Decorate(id, spv::Decoration::Patch); | ||
| 1341 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | ||
| 1342 | patches[index] = id; | ||
| 1343 | } | ||
| 1344 | break; | ||
| 1345 | case Stage::Fragment: | ||
| 1346 | for (u32 index = 0; index < 8; ++index) { | ||
| 1347 | if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { | ||
| 1348 | continue; | ||
| 1349 | } | ||
| 1350 | frag_color[index] = DefineOutput(*this, F32[4], std::nullopt); | ||
| 1351 | Decorate(frag_color[index], spv::Decoration::Location, index); | ||
| 1352 | Name(frag_color[index], fmt::format("frag_color{}", index)); | ||
| 1353 | } | ||
| 1354 | if (info.stores_frag_depth) { | ||
| 1355 | frag_depth = DefineOutput(*this, F32[1], std::nullopt); | ||
| 1356 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); | ||
| 1357 | } | ||
| 1358 | if (info.stores_sample_mask) { | ||
| 1359 | sample_mask = DefineOutput(*this, U32[1], std::nullopt); | ||
| 1360 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); | ||
| 1361 | } | ||
| 1362 | break; | ||
| 1363 | default: | ||
| 1364 | break; | ||
| 1365 | } | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h new file mode 100644 index 000000000..e277bc358 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -0,0 +1,307 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include <sirit/sirit.h> | ||
| 11 | |||
| 12 | #include "shader_recompiler/backend/bindings.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 14 | #include "shader_recompiler/profile.h" | ||
| 15 | #include "shader_recompiler/runtime_info.h" | ||
| 16 | #include "shader_recompiler/shader_info.h" | ||
| 17 | |||
| 18 | namespace Shader::Backend::SPIRV { | ||
| 19 | |||
| 20 | using Sirit::Id; | ||
| 21 | |||
| 22 | class VectorTypes { | ||
| 23 | public: | ||
| 24 | void Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name); | ||
| 25 | |||
| 26 | [[nodiscard]] Id operator[](size_t size) const noexcept { | ||
| 27 | return defs[size - 1]; | ||
| 28 | } | ||
| 29 | |||
| 30 | private: | ||
| 31 | std::array<Id, 4> defs{}; | ||
| 32 | }; | ||
| 33 | |||
| 34 | struct TextureDefinition { | ||
| 35 | Id id; | ||
| 36 | Id sampled_type; | ||
| 37 | Id pointer_type; | ||
| 38 | Id image_type; | ||
| 39 | u32 count; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct TextureBufferDefinition { | ||
| 43 | Id id; | ||
| 44 | u32 count; | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct ImageBufferDefinition { | ||
| 48 | Id id; | ||
| 49 | Id image_type; | ||
| 50 | u32 count; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct ImageDefinition { | ||
| 54 | Id id; | ||
| 55 | Id image_type; | ||
| 56 | u32 count; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct UniformDefinitions { | ||
| 60 | Id U8{}; | ||
| 61 | Id S8{}; | ||
| 62 | Id U16{}; | ||
| 63 | Id S16{}; | ||
| 64 | Id U32{}; | ||
| 65 | Id F32{}; | ||
| 66 | Id U32x2{}; | ||
| 67 | Id U32x4{}; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct StorageTypeDefinition { | ||
| 71 | Id array{}; | ||
| 72 | Id element{}; | ||
| 73 | }; | ||
| 74 | |||
| 75 | struct StorageTypeDefinitions { | ||
| 76 | StorageTypeDefinition U8{}; | ||
| 77 | StorageTypeDefinition S8{}; | ||
| 78 | StorageTypeDefinition U16{}; | ||
| 79 | StorageTypeDefinition S16{}; | ||
| 80 | StorageTypeDefinition U32{}; | ||
| 81 | StorageTypeDefinition U64{}; | ||
| 82 | StorageTypeDefinition F32{}; | ||
| 83 | StorageTypeDefinition U32x2{}; | ||
| 84 | StorageTypeDefinition U32x4{}; | ||
| 85 | }; | ||
| 86 | |||
| 87 | struct StorageDefinitions { | ||
| 88 | Id U8{}; | ||
| 89 | Id S8{}; | ||
| 90 | Id U16{}; | ||
| 91 | Id S16{}; | ||
| 92 | Id U32{}; | ||
| 93 | Id F32{}; | ||
| 94 | Id U64{}; | ||
| 95 | Id U32x2{}; | ||
| 96 | Id U32x4{}; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct GenericElementInfo { | ||
| 100 | Id id{}; | ||
| 101 | u32 first_element{}; | ||
| 102 | u32 num_components{}; | ||
| 103 | }; | ||
| 104 | |||
| 105 | class EmitContext final : public Sirit::Module { | ||
| 106 | public: | ||
| 107 | explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 108 | IR::Program& program, Bindings& binding); | ||
| 109 | ~EmitContext(); | ||
| 110 | |||
| 111 | [[nodiscard]] Id Def(const IR::Value& value); | ||
| 112 | |||
| 113 | [[nodiscard]] Id BitOffset8(const IR::Value& offset); | ||
| 114 | [[nodiscard]] Id BitOffset16(const IR::Value& offset); | ||
| 115 | |||
| 116 | Id Const(u32 value) { | ||
| 117 | return Constant(U32[1], value); | ||
| 118 | } | ||
| 119 | |||
| 120 | Id Const(u32 element_1, u32 element_2) { | ||
| 121 | return ConstantComposite(U32[2], Const(element_1), Const(element_2)); | ||
| 122 | } | ||
| 123 | |||
| 124 | Id Const(u32 element_1, u32 element_2, u32 element_3) { | ||
| 125 | return ConstantComposite(U32[3], Const(element_1), Const(element_2), Const(element_3)); | ||
| 126 | } | ||
| 127 | |||
| 128 | Id Const(u32 element_1, u32 element_2, u32 element_3, u32 element_4) { | ||
| 129 | return ConstantComposite(U32[4], Const(element_1), Const(element_2), Const(element_3), | ||
| 130 | Const(element_4)); | ||
| 131 | } | ||
| 132 | |||
| 133 | Id SConst(s32 value) { | ||
| 134 | return Constant(S32[1], value); | ||
| 135 | } | ||
| 136 | |||
| 137 | Id SConst(s32 element_1, s32 element_2) { | ||
| 138 | return ConstantComposite(S32[2], SConst(element_1), SConst(element_2)); | ||
| 139 | } | ||
| 140 | |||
| 141 | Id SConst(s32 element_1, s32 element_2, s32 element_3) { | ||
| 142 | return ConstantComposite(S32[3], SConst(element_1), SConst(element_2), SConst(element_3)); | ||
| 143 | } | ||
| 144 | |||
| 145 | Id SConst(s32 element_1, s32 element_2, s32 element_3, s32 element_4) { | ||
| 146 | return ConstantComposite(S32[4], SConst(element_1), SConst(element_2), SConst(element_3), | ||
| 147 | SConst(element_4)); | ||
| 148 | } | ||
| 149 | |||
| 150 | Id Const(f32 value) { | ||
| 151 | return Constant(F32[1], value); | ||
| 152 | } | ||
| 153 | |||
| 154 | const Profile& profile; | ||
| 155 | const RuntimeInfo& runtime_info; | ||
| 156 | Stage stage{}; | ||
| 157 | |||
| 158 | Id void_id{}; | ||
| 159 | Id U1{}; | ||
| 160 | Id U8{}; | ||
| 161 | Id S8{}; | ||
| 162 | Id U16{}; | ||
| 163 | Id S16{}; | ||
| 164 | Id U64{}; | ||
| 165 | VectorTypes F32; | ||
| 166 | VectorTypes U32; | ||
| 167 | VectorTypes S32; | ||
| 168 | VectorTypes F16; | ||
| 169 | VectorTypes F64; | ||
| 170 | |||
| 171 | Id true_value{}; | ||
| 172 | Id false_value{}; | ||
| 173 | Id u32_zero_value{}; | ||
| 174 | Id f32_zero_value{}; | ||
| 175 | |||
| 176 | UniformDefinitions uniform_types; | ||
| 177 | StorageTypeDefinitions storage_types; | ||
| 178 | |||
| 179 | Id private_u32{}; | ||
| 180 | |||
| 181 | Id shared_u8{}; | ||
| 182 | Id shared_u16{}; | ||
| 183 | Id shared_u32{}; | ||
| 184 | Id shared_u64{}; | ||
| 185 | Id shared_u32x2{}; | ||
| 186 | Id shared_u32x4{}; | ||
| 187 | |||
| 188 | Id input_f32{}; | ||
| 189 | Id input_u32{}; | ||
| 190 | Id input_s32{}; | ||
| 191 | |||
| 192 | Id output_f32{}; | ||
| 193 | Id output_u32{}; | ||
| 194 | |||
| 195 | Id image_buffer_type{}; | ||
| 196 | Id sampled_texture_buffer_type{}; | ||
| 197 | Id image_u32{}; | ||
| 198 | |||
| 199 | std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; | ||
| 200 | std::array<StorageDefinitions, Info::MAX_SSBOS> ssbos{}; | ||
| 201 | std::vector<TextureBufferDefinition> texture_buffers; | ||
| 202 | std::vector<ImageBufferDefinition> image_buffers; | ||
| 203 | std::vector<TextureDefinition> textures; | ||
| 204 | std::vector<ImageDefinition> images; | ||
| 205 | |||
| 206 | Id workgroup_id{}; | ||
| 207 | Id local_invocation_id{}; | ||
| 208 | Id invocation_id{}; | ||
| 209 | Id sample_id{}; | ||
| 210 | Id is_helper_invocation{}; | ||
| 211 | Id subgroup_local_invocation_id{}; | ||
| 212 | Id subgroup_mask_eq{}; | ||
| 213 | Id subgroup_mask_lt{}; | ||
| 214 | Id subgroup_mask_le{}; | ||
| 215 | Id subgroup_mask_gt{}; | ||
| 216 | Id subgroup_mask_ge{}; | ||
| 217 | Id instance_id{}; | ||
| 218 | Id instance_index{}; | ||
| 219 | Id base_instance{}; | ||
| 220 | Id vertex_id{}; | ||
| 221 | Id vertex_index{}; | ||
| 222 | Id base_vertex{}; | ||
| 223 | Id front_face{}; | ||
| 224 | Id point_coord{}; | ||
| 225 | Id tess_coord{}; | ||
| 226 | Id clip_distances{}; | ||
| 227 | Id layer{}; | ||
| 228 | Id viewport_index{}; | ||
| 229 | Id viewport_mask{}; | ||
| 230 | Id primitive_id{}; | ||
| 231 | |||
| 232 | Id fswzadd_lut_a{}; | ||
| 233 | Id fswzadd_lut_b{}; | ||
| 234 | |||
| 235 | Id indexed_load_func{}; | ||
| 236 | Id indexed_store_func{}; | ||
| 237 | |||
| 238 | Id local_memory{}; | ||
| 239 | |||
| 240 | Id shared_memory_u8{}; | ||
| 241 | Id shared_memory_u16{}; | ||
| 242 | Id shared_memory_u32{}; | ||
| 243 | Id shared_memory_u64{}; | ||
| 244 | Id shared_memory_u32x2{}; | ||
| 245 | Id shared_memory_u32x4{}; | ||
| 246 | |||
| 247 | Id shared_memory_u32_type{}; | ||
| 248 | |||
| 249 | Id shared_store_u8_func{}; | ||
| 250 | Id shared_store_u16_func{}; | ||
| 251 | Id increment_cas_shared{}; | ||
| 252 | Id increment_cas_ssbo{}; | ||
| 253 | Id decrement_cas_shared{}; | ||
| 254 | Id decrement_cas_ssbo{}; | ||
| 255 | Id f32_add_cas{}; | ||
| 256 | Id f16x2_add_cas{}; | ||
| 257 | Id f16x2_min_cas{}; | ||
| 258 | Id f16x2_max_cas{}; | ||
| 259 | Id f32x2_add_cas{}; | ||
| 260 | Id f32x2_min_cas{}; | ||
| 261 | Id f32x2_max_cas{}; | ||
| 262 | |||
| 263 | Id load_global_func_u32{}; | ||
| 264 | Id load_global_func_u32x2{}; | ||
| 265 | Id load_global_func_u32x4{}; | ||
| 266 | Id write_global_func_u32{}; | ||
| 267 | Id write_global_func_u32x2{}; | ||
| 268 | Id write_global_func_u32x4{}; | ||
| 269 | |||
| 270 | Id input_position{}; | ||
| 271 | std::array<Id, 32> input_generics{}; | ||
| 272 | |||
| 273 | Id output_point_size{}; | ||
| 274 | Id output_position{}; | ||
| 275 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; | ||
| 276 | |||
| 277 | Id output_tess_level_outer{}; | ||
| 278 | Id output_tess_level_inner{}; | ||
| 279 | std::array<Id, 30> patches{}; | ||
| 280 | |||
| 281 | std::array<Id, 8> frag_color{}; | ||
| 282 | Id sample_mask{}; | ||
| 283 | Id frag_depth{}; | ||
| 284 | |||
| 285 | std::vector<Id> interfaces; | ||
| 286 | |||
| 287 | private: | ||
| 288 | void DefineCommonTypes(const Info& info); | ||
| 289 | void DefineCommonConstants(); | ||
| 290 | void DefineInterfaces(const IR::Program& program); | ||
| 291 | void DefineLocalMemory(const IR::Program& program); | ||
| 292 | void DefineSharedMemory(const IR::Program& program); | ||
| 293 | void DefineSharedMemoryFunctions(const IR::Program& program); | ||
| 294 | void DefineConstantBuffers(const Info& info, u32& binding); | ||
| 295 | void DefineStorageBuffers(const Info& info, u32& binding); | ||
| 296 | void DefineTextureBuffers(const Info& info, u32& binding); | ||
| 297 | void DefineImageBuffers(const Info& info, u32& binding); | ||
| 298 | void DefineTextures(const Info& info, u32& binding); | ||
| 299 | void DefineImages(const Info& info, u32& binding); | ||
| 300 | void DefineAttributeMemAccess(const Info& info); | ||
| 301 | void DefineGlobalMemoryFunctions(const Info& info); | ||
| 302 | |||
| 303 | void DefineInputs(const IR::Program& program); | ||
| 304 | void DefineOutputs(const IR::Program& program); | ||
| 305 | }; | ||
| 306 | |||
| 307 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp new file mode 100644 index 000000000..d7a86e270 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -0,0 +1,541 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <span> | ||
| 6 | #include <tuple> | ||
| 7 | #include <type_traits> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/settings.h" | ||
| 12 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 13 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 16 | |||
| 17 | namespace Shader::Backend::SPIRV { | ||
| 18 | namespace { | ||
| 19 | template <class Func> | ||
| 20 | struct FuncTraits {}; | ||
| 21 | |||
| 22 | template <class ReturnType_, class... Args> | ||
| 23 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 24 | using ReturnType = ReturnType_; | ||
| 25 | |||
| 26 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 27 | |||
| 28 | template <size_t I> | ||
| 29 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 30 | }; | ||
| 31 | |||
| 32 | template <auto func, typename... Args> | ||
| 33 | void SetDefinition(EmitContext& ctx, IR::Inst* inst, Args... args) { | ||
| 34 | inst->SetDefinition<Id>(func(ctx, std::forward<Args>(args)...)); | ||
| 35 | } | ||
| 36 | |||
| 37 | template <typename ArgType> | ||
| 38 | ArgType Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 39 | if constexpr (std::is_same_v<ArgType, Id>) { | ||
| 40 | return ctx.Def(arg); | ||
| 41 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 42 | return arg; | ||
| 43 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 44 | return arg.U32(); | ||
| 45 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 46 | return arg.Attribute(); | ||
| 47 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 48 | return arg.Patch(); | ||
| 49 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 50 | return arg.Reg(); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 55 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 56 | using Traits = FuncTraits<decltype(func)>; | ||
| 57 | if constexpr (std::is_same_v<typename Traits::ReturnType, Id>) { | ||
| 58 | if constexpr (is_first_arg_inst) { | ||
| 59 | SetDefinition<func>( | ||
| 60 | ctx, inst, inst, | ||
| 61 | Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 62 | } else { | ||
| 63 | SetDefinition<func>( | ||
| 64 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 65 | } | ||
| 66 | } else { | ||
| 67 | if constexpr (is_first_arg_inst) { | ||
| 68 | func(ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...); | ||
| 69 | } else { | ||
| 70 | func(ctx, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | template <auto func> | ||
| 76 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 77 | using Traits = FuncTraits<decltype(func)>; | ||
| 78 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 79 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 80 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 81 | } else { | ||
| 82 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 83 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst*>; | ||
| 84 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 85 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 90 | switch (inst->GetOpcode()) { | ||
| 91 | #define OPCODE(name, result_type, ...) \ | ||
| 92 | case IR::Opcode::name: \ | ||
| 93 | return Invoke<&Emit##name>(ctx, inst); | ||
| 94 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 95 | #undef OPCODE | ||
| 96 | } | ||
| 97 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 98 | } | ||
| 99 | |||
| 100 | Id TypeId(const EmitContext& ctx, IR::Type type) { | ||
| 101 | switch (type) { | ||
| 102 | case IR::Type::U1: | ||
| 103 | return ctx.U1; | ||
| 104 | case IR::Type::U32: | ||
| 105 | return ctx.U32[1]; | ||
| 106 | default: | ||
| 107 | throw NotImplementedException("Phi node type {}", type); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | void Traverse(EmitContext& ctx, IR::Program& program) { | ||
| 112 | IR::Block* current_block{}; | ||
| 113 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 114 | switch (node.type) { | ||
| 115 | case IR::AbstractSyntaxNode::Type::Block: { | ||
| 116 | const Id label{node.data.block->Definition<Id>()}; | ||
| 117 | if (current_block) { | ||
| 118 | ctx.OpBranch(label); | ||
| 119 | } | ||
| 120 | current_block = node.data.block; | ||
| 121 | ctx.AddLabel(label); | ||
| 122 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 123 | EmitInst(ctx, &inst); | ||
| 124 | } | ||
| 125 | break; | ||
| 126 | } | ||
| 127 | case IR::AbstractSyntaxNode::Type::If: { | ||
| 128 | const Id if_label{node.data.if_node.body->Definition<Id>()}; | ||
| 129 | const Id endif_label{node.data.if_node.merge->Definition<Id>()}; | ||
| 130 | ctx.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone); | ||
| 131 | ctx.OpBranchConditional(ctx.Def(node.data.if_node.cond), if_label, endif_label); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | case IR::AbstractSyntaxNode::Type::Loop: { | ||
| 135 | const Id body_label{node.data.loop.body->Definition<Id>()}; | ||
| 136 | const Id continue_label{node.data.loop.continue_block->Definition<Id>()}; | ||
| 137 | const Id endloop_label{node.data.loop.merge->Definition<Id>()}; | ||
| 138 | |||
| 139 | ctx.OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); | ||
| 140 | ctx.OpBranch(body_label); | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | case IR::AbstractSyntaxNode::Type::Break: { | ||
| 144 | const Id break_label{node.data.break_node.merge->Definition<Id>()}; | ||
| 145 | const Id skip_label{node.data.break_node.skip->Definition<Id>()}; | ||
| 146 | ctx.OpBranchConditional(ctx.Def(node.data.break_node.cond), break_label, skip_label); | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 150 | if (current_block) { | ||
| 151 | ctx.OpBranch(node.data.end_if.merge->Definition<Id>()); | ||
| 152 | } | ||
| 153 | break; | ||
| 154 | case IR::AbstractSyntaxNode::Type::Repeat: { | ||
| 155 | Id cond{ctx.Def(node.data.repeat.cond)}; | ||
| 156 | if (!Settings::values.disable_shader_loop_safety_checks) { | ||
| 157 | const Id pointer_type{ctx.TypePointer(spv::StorageClass::Private, ctx.U32[1])}; | ||
| 158 | const Id safety_counter{ctx.AddGlobalVariable( | ||
| 159 | pointer_type, spv::StorageClass::Private, ctx.Const(0x2000u))}; | ||
| 160 | if (ctx.profile.supported_spirv >= 0x00010400) { | ||
| 161 | ctx.interfaces.push_back(safety_counter); | ||
| 162 | } | ||
| 163 | const Id old_counter{ctx.OpLoad(ctx.U32[1], safety_counter)}; | ||
| 164 | const Id new_counter{ctx.OpISub(ctx.U32[1], old_counter, ctx.Const(1u))}; | ||
| 165 | ctx.OpStore(safety_counter, new_counter); | ||
| 166 | |||
| 167 | const Id safety_cond{ | ||
| 168 | ctx.OpSGreaterThanEqual(ctx.U1, new_counter, ctx.u32_zero_value)}; | ||
| 169 | cond = ctx.OpLogicalAnd(ctx.U1, cond, safety_cond); | ||
| 170 | } | ||
| 171 | const Id loop_header_label{node.data.repeat.loop_header->Definition<Id>()}; | ||
| 172 | const Id merge_label{node.data.repeat.merge->Definition<Id>()}; | ||
| 173 | ctx.OpBranchConditional(cond, loop_header_label, merge_label); | ||
| 174 | break; | ||
| 175 | } | ||
| 176 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 177 | ctx.OpReturn(); | ||
| 178 | break; | ||
| 179 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 180 | ctx.OpUnreachable(); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | if (node.type != IR::AbstractSyntaxNode::Type::Block) { | ||
| 184 | current_block = nullptr; | ||
| 185 | } | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | Id DefineMain(EmitContext& ctx, IR::Program& program) { | ||
| 190 | const Id void_function{ctx.TypeFunction(ctx.void_id)}; | ||
| 191 | const Id main{ctx.OpFunction(ctx.void_id, spv::FunctionControlMask::MaskNone, void_function)}; | ||
| 192 | for (IR::Block* const block : program.blocks) { | ||
| 193 | block->SetDefinition(ctx.OpLabel()); | ||
| 194 | } | ||
| 195 | Traverse(ctx, program); | ||
| 196 | ctx.OpFunctionEnd(); | ||
| 197 | return main; | ||
| 198 | } | ||
| 199 | |||
| 200 | spv::ExecutionMode ExecutionMode(TessPrimitive primitive) { | ||
| 201 | switch (primitive) { | ||
| 202 | case TessPrimitive::Isolines: | ||
| 203 | return spv::ExecutionMode::Isolines; | ||
| 204 | case TessPrimitive::Triangles: | ||
| 205 | return spv::ExecutionMode::Triangles; | ||
| 206 | case TessPrimitive::Quads: | ||
| 207 | return spv::ExecutionMode::Quads; | ||
| 208 | } | ||
| 209 | throw InvalidArgument("Tessellation primitive {}", primitive); | ||
| 210 | } | ||
| 211 | |||
| 212 | spv::ExecutionMode ExecutionMode(TessSpacing spacing) { | ||
| 213 | switch (spacing) { | ||
| 214 | case TessSpacing::Equal: | ||
| 215 | return spv::ExecutionMode::SpacingEqual; | ||
| 216 | case TessSpacing::FractionalOdd: | ||
| 217 | return spv::ExecutionMode::SpacingFractionalOdd; | ||
| 218 | case TessSpacing::FractionalEven: | ||
| 219 | return spv::ExecutionMode::SpacingFractionalEven; | ||
| 220 | } | ||
| 221 | throw InvalidArgument("Tessellation spacing {}", spacing); | ||
| 222 | } | ||
| 223 | |||
| 224 | void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { | ||
| 225 | const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size()); | ||
| 226 | spv::ExecutionModel execution_model{}; | ||
| 227 | switch (program.stage) { | ||
| 228 | case Stage::Compute: { | ||
| 229 | const std::array<u32, 3> workgroup_size{program.workgroup_size}; | ||
| 230 | execution_model = spv::ExecutionModel::GLCompute; | ||
| 231 | ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0], | ||
| 232 | workgroup_size[1], workgroup_size[2]); | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | case Stage::VertexB: | ||
| 236 | execution_model = spv::ExecutionModel::Vertex; | ||
| 237 | break; | ||
| 238 | case Stage::TessellationControl: | ||
| 239 | execution_model = spv::ExecutionModel::TessellationControl; | ||
| 240 | ctx.AddCapability(spv::Capability::Tessellation); | ||
| 241 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.invocations); | ||
| 242 | break; | ||
| 243 | case Stage::TessellationEval: | ||
| 244 | execution_model = spv::ExecutionModel::TessellationEvaluation; | ||
| 245 | ctx.AddCapability(spv::Capability::Tessellation); | ||
| 246 | ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_primitive)); | ||
| 247 | ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.tess_spacing)); | ||
| 248 | ctx.AddExecutionMode(main, ctx.runtime_info.tess_clockwise | ||
| 249 | ? spv::ExecutionMode::VertexOrderCw | ||
| 250 | : spv::ExecutionMode::VertexOrderCcw); | ||
| 251 | break; | ||
| 252 | case Stage::Geometry: | ||
| 253 | execution_model = spv::ExecutionModel::Geometry; | ||
| 254 | ctx.AddCapability(spv::Capability::Geometry); | ||
| 255 | ctx.AddCapability(spv::Capability::GeometryStreams); | ||
| 256 | switch (ctx.runtime_info.input_topology) { | ||
| 257 | case InputTopology::Points: | ||
| 258 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); | ||
| 259 | break; | ||
| 260 | case InputTopology::Lines: | ||
| 261 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); | ||
| 262 | break; | ||
| 263 | case InputTopology::LinesAdjacency: | ||
| 264 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); | ||
| 265 | break; | ||
| 266 | case InputTopology::Triangles: | ||
| 267 | ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); | ||
| 268 | break; | ||
| 269 | case InputTopology::TrianglesAdjacency: | ||
| 270 | ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); | ||
| 271 | break; | ||
| 272 | } | ||
| 273 | switch (program.output_topology) { | ||
| 274 | case OutputTopology::PointList: | ||
| 275 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); | ||
| 276 | break; | ||
| 277 | case OutputTopology::LineStrip: | ||
| 278 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); | ||
| 279 | break; | ||
| 280 | case OutputTopology::TriangleStrip: | ||
| 281 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); | ||
| 282 | break; | ||
| 283 | } | ||
| 284 | if (program.info.stores[IR::Attribute::PointSize]) { | ||
| 285 | ctx.AddCapability(spv::Capability::GeometryPointSize); | ||
| 286 | } | ||
| 287 | ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); | ||
| 288 | ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); | ||
| 289 | if (program.is_geometry_passthrough) { | ||
| 290 | if (ctx.profile.support_geometry_shader_passthrough) { | ||
| 291 | ctx.AddExtension("SPV_NV_geometry_shader_passthrough"); | ||
| 292 | ctx.AddCapability(spv::Capability::GeometryShaderPassthroughNV); | ||
| 293 | } else { | ||
| 294 | LOG_WARNING(Shader_SPIRV, "Geometry shader passthrough used with no support"); | ||
| 295 | } | ||
| 296 | } | ||
| 297 | break; | ||
| 298 | case Stage::Fragment: | ||
| 299 | execution_model = spv::ExecutionModel::Fragment; | ||
| 300 | if (ctx.profile.lower_left_origin_mode) { | ||
| 301 | ctx.AddExecutionMode(main, spv::ExecutionMode::OriginLowerLeft); | ||
| 302 | } else { | ||
| 303 | ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | ||
| 304 | } | ||
| 305 | if (program.info.stores_frag_depth) { | ||
| 306 | ctx.AddExecutionMode(main, spv::ExecutionMode::DepthReplacing); | ||
| 307 | } | ||
| 308 | if (ctx.runtime_info.force_early_z) { | ||
| 309 | ctx.AddExecutionMode(main, spv::ExecutionMode::EarlyFragmentTests); | ||
| 310 | } | ||
| 311 | break; | ||
| 312 | default: | ||
| 313 | throw NotImplementedException("Stage {}", program.stage); | ||
| 314 | } | ||
| 315 | ctx.AddEntryPoint(execution_model, main, "main", interfaces); | ||
| 316 | } | ||
| 317 | |||
| 318 | void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, | ||
| 319 | Id main_func) { | ||
| 320 | const Info& info{program.info}; | ||
| 321 | if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { | ||
| 322 | LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader"); | ||
| 323 | } else if (info.uses_fp32_denorms_flush) { | ||
| 324 | if (profile.support_fp32_denorm_flush) { | ||
| 325 | ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||
| 326 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); | ||
| 327 | } else { | ||
| 328 | // Drivers will most likely flush denorms by default, no need to warn | ||
| 329 | } | ||
| 330 | } else if (info.uses_fp32_denorms_preserve) { | ||
| 331 | if (profile.support_fp32_denorm_preserve) { | ||
| 332 | ctx.AddCapability(spv::Capability::DenormPreserve); | ||
| 333 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); | ||
| 334 | } else { | ||
| 335 | LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support"); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | if (!profile.support_separate_denorm_behavior || profile.has_broken_fp16_float_controls) { | ||
| 339 | // No separate denorm behavior | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { | ||
| 343 | LOG_DEBUG(Shader_SPIRV, "Fp16 denorm flush and preserve on the same shader"); | ||
| 344 | } else if (info.uses_fp16_denorms_flush) { | ||
| 345 | if (profile.support_fp16_denorm_flush) { | ||
| 346 | ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||
| 347 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 16U); | ||
| 348 | } else { | ||
| 349 | // Same as fp32, no need to warn as most drivers will flush by default | ||
| 350 | } | ||
| 351 | } else if (info.uses_fp16_denorms_preserve) { | ||
| 352 | if (profile.support_fp16_denorm_preserve) { | ||
| 353 | ctx.AddCapability(spv::Capability::DenormPreserve); | ||
| 354 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); | ||
| 355 | } else { | ||
| 356 | LOG_DEBUG(Shader_SPIRV, "Fp16 denorm preserve used in shader without host support"); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | } | ||
| 360 | |||
| 361 | void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& program, | ||
| 362 | EmitContext& ctx, Id main_func) { | ||
| 363 | if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) { | ||
| 364 | return; | ||
| 365 | } | ||
| 366 | if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { | ||
| 367 | ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||
| 368 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); | ||
| 369 | } | ||
| 370 | if (profile.support_fp32_signed_zero_nan_preserve) { | ||
| 371 | ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||
| 372 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); | ||
| 373 | } | ||
| 374 | if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) { | ||
| 375 | ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||
| 376 | ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 64U); | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 380 | void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ctx) { | ||
| 381 | if (info.uses_sampled_1d) { | ||
| 382 | ctx.AddCapability(spv::Capability::Sampled1D); | ||
| 383 | } | ||
| 384 | if (info.uses_sparse_residency) { | ||
| 385 | ctx.AddCapability(spv::Capability::SparseResidency); | ||
| 386 | } | ||
| 387 | if (info.uses_demote_to_helper_invocation && profile.support_demote_to_helper_invocation) { | ||
| 388 | ctx.AddExtension("SPV_EXT_demote_to_helper_invocation"); | ||
| 389 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT); | ||
| 390 | } | ||
| 391 | if (info.stores[IR::Attribute::ViewportIndex]) { | ||
| 392 | ctx.AddCapability(spv::Capability::MultiViewport); | ||
| 393 | } | ||
| 394 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 395 | ctx.AddExtension("SPV_NV_viewport_array2"); | ||
| 396 | ctx.AddCapability(spv::Capability::ShaderViewportMaskNV); | ||
| 397 | } | ||
| 398 | if (info.stores[IR::Attribute::Layer] || info.stores[IR::Attribute::ViewportIndex]) { | ||
| 399 | if (profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { | ||
| 400 | ctx.AddExtension("SPV_EXT_shader_viewport_index_layer"); | ||
| 401 | ctx.AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); | ||
| 402 | } | ||
| 403 | } | ||
| 404 | if (!profile.support_vertex_instance_id && | ||
| 405 | (info.loads[IR::Attribute::InstanceId] || info.loads[IR::Attribute::VertexId])) { | ||
| 406 | ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | ||
| 407 | ctx.AddCapability(spv::Capability::DrawParameters); | ||
| 408 | } | ||
| 409 | if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id || | ||
| 410 | info.uses_subgroup_shuffles) && | ||
| 411 | profile.support_vote) { | ||
| 412 | ctx.AddExtension("SPV_KHR_shader_ballot"); | ||
| 413 | ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | ||
| 414 | if (!profile.warp_size_potentially_larger_than_guest) { | ||
| 415 | // vote ops are only used when not taking the long path | ||
| 416 | ctx.AddExtension("SPV_KHR_subgroup_vote"); | ||
| 417 | ctx.AddCapability(spv::Capability::SubgroupVoteKHR); | ||
| 418 | } | ||
| 419 | } | ||
| 420 | if (info.uses_int64_bit_atomics && profile.support_int64_atomics) { | ||
| 421 | ctx.AddCapability(spv::Capability::Int64Atomics); | ||
| 422 | } | ||
| 423 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||
| 424 | ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); | ||
| 425 | } | ||
| 426 | if (info.uses_typeless_image_writes) { | ||
| 427 | ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat); | ||
| 428 | } | ||
| 429 | if (info.uses_image_buffers) { | ||
| 430 | ctx.AddCapability(spv::Capability::ImageBuffer); | ||
| 431 | } | ||
| 432 | if (info.uses_sample_id) { | ||
| 433 | ctx.AddCapability(spv::Capability::SampleRateShading); | ||
| 434 | } | ||
| 435 | if (!ctx.runtime_info.xfb_varyings.empty()) { | ||
| 436 | ctx.AddCapability(spv::Capability::TransformFeedback); | ||
| 437 | } | ||
| 438 | if (info.uses_derivatives) { | ||
| 439 | ctx.AddCapability(spv::Capability::DerivativeControl); | ||
| 440 | } | ||
| 441 | // TODO: Track this usage | ||
| 442 | ctx.AddCapability(spv::Capability::ImageGatherExtended); | ||
| 443 | ctx.AddCapability(spv::Capability::ImageQuery); | ||
| 444 | ctx.AddCapability(spv::Capability::SampledBuffer); | ||
| 445 | } | ||
| 446 | |||
| 447 | void PatchPhiNodes(IR::Program& program, EmitContext& ctx) { | ||
| 448 | auto inst{program.blocks.front()->begin()}; | ||
| 449 | size_t block_index{0}; | ||
| 450 | ctx.PatchDeferredPhi([&](size_t phi_arg) { | ||
| 451 | if (phi_arg == 0) { | ||
| 452 | ++inst; | ||
| 453 | if (inst == program.blocks[block_index]->end() || | ||
| 454 | inst->GetOpcode() != IR::Opcode::Phi) { | ||
| 455 | do { | ||
| 456 | ++block_index; | ||
| 457 | inst = program.blocks[block_index]->begin(); | ||
| 458 | } while (inst->GetOpcode() != IR::Opcode::Phi); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | return ctx.Def(inst->Arg(phi_arg)); | ||
| 462 | }); | ||
| 463 | } | ||
| 464 | } // Anonymous namespace | ||
| 465 | |||
| 466 | std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 467 | IR::Program& program, Bindings& bindings) { | ||
| 468 | EmitContext ctx{profile, runtime_info, program, bindings}; | ||
| 469 | const Id main{DefineMain(ctx, program)}; | ||
| 470 | DefineEntryPoint(program, ctx, main); | ||
| 471 | if (profile.support_float_controls) { | ||
| 472 | ctx.AddExtension("SPV_KHR_float_controls"); | ||
| 473 | SetupDenormControl(profile, program, ctx, main); | ||
| 474 | SetupSignedNanCapabilities(profile, program, ctx, main); | ||
| 475 | } | ||
| 476 | SetupCapabilities(profile, program.info, ctx); | ||
| 477 | PatchPhiNodes(program, ctx); | ||
| 478 | return ctx.Assemble(); | ||
| 479 | } | ||
| 480 | |||
| 481 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst) { | ||
| 482 | const size_t num_args{inst->NumArgs()}; | ||
| 483 | boost::container::small_vector<Id, 32> blocks; | ||
| 484 | blocks.reserve(num_args); | ||
| 485 | for (size_t index = 0; index < num_args; ++index) { | ||
| 486 | blocks.push_back(inst->PhiBlock(index)->Definition<Id>()); | ||
| 487 | } | ||
| 488 | // The type of a phi instruction is stored in its flags | ||
| 489 | const Id result_type{TypeId(ctx, inst->Flags<IR::Type>())}; | ||
| 490 | return ctx.DeferredOpPhi(result_type, std::span(blocks.data(), blocks.size())); | ||
| 491 | } | ||
| 492 | |||
| 493 | void EmitVoid(EmitContext&) {} | ||
| 494 | |||
| 495 | Id EmitIdentity(EmitContext& ctx, const IR::Value& value) { | ||
| 496 | const Id id{ctx.Def(value)}; | ||
| 497 | if (!Sirit::ValidId(id)) { | ||
| 498 | throw NotImplementedException("Forward identity declaration"); | ||
| 499 | } | ||
| 500 | return id; | ||
| 501 | } | ||
| 502 | |||
| 503 | Id EmitConditionRef(EmitContext& ctx, const IR::Value& value) { | ||
| 504 | const Id id{ctx.Def(value)}; | ||
| 505 | if (!Sirit::ValidId(id)) { | ||
| 506 | throw NotImplementedException("Forward identity declaration"); | ||
| 507 | } | ||
| 508 | return id; | ||
| 509 | } | ||
| 510 | |||
| 511 | void EmitReference(EmitContext&) {} | ||
| 512 | |||
| 513 | void EmitPhiMove(EmitContext&) { | ||
| 514 | throw LogicError("Unreachable instruction"); | ||
| 515 | } | ||
| 516 | |||
| 517 | void EmitGetZeroFromOp(EmitContext&) { | ||
| 518 | throw LogicError("Unreachable instruction"); | ||
| 519 | } | ||
| 520 | |||
| 521 | void EmitGetSignFromOp(EmitContext&) { | ||
| 522 | throw LogicError("Unreachable instruction"); | ||
| 523 | } | ||
| 524 | |||
| 525 | void EmitGetCarryFromOp(EmitContext&) { | ||
| 526 | throw LogicError("Unreachable instruction"); | ||
| 527 | } | ||
| 528 | |||
| 529 | void EmitGetOverflowFromOp(EmitContext&) { | ||
| 530 | throw LogicError("Unreachable instruction"); | ||
| 531 | } | ||
| 532 | |||
| 533 | void EmitGetSparseFromOp(EmitContext&) { | ||
| 534 | throw LogicError("Unreachable instruction"); | ||
| 535 | } | ||
| 536 | |||
| 537 | void EmitGetInBoundsFromOp(EmitContext&) { | ||
| 538 | throw LogicError("Unreachable instruction"); | ||
| 539 | } | ||
| 540 | |||
| 541 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h new file mode 100644 index 000000000..db0c935fe --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <sirit/sirit.h> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/backend/bindings.h" | ||
| 13 | #include "shader_recompiler/backend/spirv/emit_context.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 15 | #include "shader_recompiler/profile.h" | ||
| 16 | |||
| 17 | namespace Shader::Backend::SPIRV { | ||
| 18 | |||
| 19 | [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 20 | IR::Program& program, Bindings& bindings); | ||
| 21 | |||
| 22 | [[nodiscard]] inline std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program) { | ||
| 23 | Bindings binding; | ||
| 24 | return EmitSPIRV(profile, {}, program, binding); | ||
| 25 | } | ||
| 26 | |||
| 27 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..9af8bb9e1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | |||
| @@ -0,0 +1,448 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id SharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { | ||
| 11 | const Id shift_id{ctx.Const(2U)}; | ||
| 12 | Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 13 | if (index_offset > 0) { | ||
| 14 | index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); | ||
| 15 | } | ||
| 16 | return ctx.profile.support_explicit_workgroup_layout | ||
| 17 | ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) | ||
| 18 | : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); | ||
| 19 | } | ||
| 20 | |||
| 21 | Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { | ||
| 22 | if (offset.IsImmediate()) { | ||
| 23 | const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)}; | ||
| 24 | return ctx.Const(imm_offset); | ||
| 25 | } | ||
| 26 | const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 27 | const Id index{ctx.Def(offset)}; | ||
| 28 | if (shift == 0) { | ||
| 29 | return index; | ||
| 30 | } | ||
| 31 | const Id shift_id{ctx.Const(shift)}; | ||
| 32 | return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); | ||
| 33 | } | ||
| 34 | |||
| 35 | Id StoragePointer(EmitContext& ctx, const StorageTypeDefinition& type_def, | ||
| 36 | Id StorageDefinitions::*member_ptr, const IR::Value& binding, | ||
| 37 | const IR::Value& offset, size_t element_size) { | ||
| 38 | if (!binding.IsImmediate()) { | ||
| 39 | throw NotImplementedException("Dynamic storage buffer indexing"); | ||
| 40 | } | ||
| 41 | const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; | ||
| 42 | const Id index{StorageIndex(ctx, offset, element_size)}; | ||
| 43 | return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { | ||
| 47 | const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; | ||
| 48 | const Id semantics{ctx.u32_zero_value}; | ||
| 49 | return {scope, semantics}; | ||
| 50 | } | ||
| 51 | |||
| 52 | Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, | ||
| 53 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { | ||
| 54 | const Id pointer{SharedPointer(ctx, offset)}; | ||
| 55 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 56 | return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); | ||
| 57 | } | ||
| 58 | |||
| 59 | Id StorageAtomicU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 60 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { | ||
| 61 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U32, &StorageDefinitions::U32, binding, | ||
| 62 | offset, sizeof(u32))}; | ||
| 63 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 64 | return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id StorageAtomicU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 68 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id), | ||
| 69 | Id (Sirit::Module::*non_atomic_func)(Id, Id, Id)) { | ||
| 70 | if (ctx.profile.support_int64_atomics) { | ||
| 71 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, | ||
| 72 | binding, offset, sizeof(u64))}; | ||
| 73 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 74 | return (ctx.*atomic_func)(ctx.U64, pointer, scope, semantics, value); | ||
| 75 | } | ||
| 76 | LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 77 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, | ||
| 78 | binding, offset, sizeof(u32[2]))}; | ||
| 79 | const Id original_value{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; | ||
| 80 | const Id result{(ctx.*non_atomic_func)(ctx.U64, value, original_value)}; | ||
| 81 | ctx.OpStore(pointer, ctx.OpBitcast(ctx.U32[2], result)); | ||
| 82 | return original_value; | ||
| 83 | } | ||
| 84 | } // Anonymous namespace | ||
| 85 | |||
| 86 | Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id offset, Id value) { | ||
| 87 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicIAdd); | ||
| 88 | } | ||
| 89 | |||
| 90 | Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value) { | ||
| 91 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMin); | ||
| 92 | } | ||
| 93 | |||
| 94 | Id EmitSharedAtomicUMin32(EmitContext& ctx, Id offset, Id value) { | ||
| 95 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMin); | ||
| 96 | } | ||
| 97 | |||
| 98 | Id EmitSharedAtomicSMax32(EmitContext& ctx, Id offset, Id value) { | ||
| 99 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicSMax); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitSharedAtomicUMax32(EmitContext& ctx, Id offset, Id value) { | ||
| 103 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicUMax); | ||
| 104 | } | ||
| 105 | |||
| 106 | Id EmitSharedAtomicInc32(EmitContext& ctx, Id offset, Id value) { | ||
| 107 | const Id shift_id{ctx.Const(2U)}; | ||
| 108 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 109 | return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value); | ||
| 110 | } | ||
| 111 | |||
| 112 | Id EmitSharedAtomicDec32(EmitContext& ctx, Id offset, Id value) { | ||
| 113 | const Id shift_id{ctx.Const(2U)}; | ||
| 114 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 115 | return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value); | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value) { | ||
| 119 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicAnd); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value) { | ||
| 123 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicOr); | ||
| 124 | } | ||
| 125 | |||
| 126 | Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value) { | ||
| 127 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicXor); | ||
| 128 | } | ||
| 129 | |||
| 130 | Id EmitSharedAtomicExchange32(EmitContext& ctx, Id offset, Id value) { | ||
| 131 | return SharedAtomicU32(ctx, offset, value, &Sirit::Module::OpAtomicExchange); | ||
| 132 | } | ||
| 133 | |||
| 134 | Id EmitSharedAtomicExchange64(EmitContext& ctx, Id offset, Id value) { | ||
| 135 | if (ctx.profile.support_int64_atomics && ctx.profile.support_explicit_workgroup_layout) { | ||
| 136 | const Id shift_id{ctx.Const(3U)}; | ||
| 137 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 138 | const Id pointer{ | ||
| 139 | ctx.OpAccessChain(ctx.shared_u64, ctx.shared_memory_u64, ctx.u32_zero_value, index)}; | ||
| 140 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 141 | return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); | ||
| 142 | } | ||
| 143 | LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 144 | const Id pointer_1{SharedPointer(ctx, offset, 0)}; | ||
| 145 | const Id pointer_2{SharedPointer(ctx, offset, 1)}; | ||
| 146 | const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; | ||
| 147 | const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; | ||
| 148 | const Id new_vector{ctx.OpBitcast(ctx.U32[2], value)}; | ||
| 149 | ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 0U)); | ||
| 150 | ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], new_vector, 1U)); | ||
| 151 | return ctx.OpBitcast(ctx.U64, ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)); | ||
| 152 | } | ||
| 153 | |||
| 154 | Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 155 | Id value) { | ||
| 156 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd); | ||
| 157 | } | ||
| 158 | |||
| 159 | Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 160 | Id value) { | ||
| 161 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin); | ||
| 162 | } | ||
| 163 | |||
| 164 | Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 165 | Id value) { | ||
| 166 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin); | ||
| 167 | } | ||
| 168 | |||
| 169 | Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 170 | Id value) { | ||
| 171 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax); | ||
| 172 | } | ||
| 173 | |||
| 174 | Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 175 | Id value) { | ||
| 176 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax); | ||
| 177 | } | ||
| 178 | |||
| 179 | Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 180 | Id value) { | ||
| 181 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 182 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 183 | return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); | ||
| 184 | } | ||
| 185 | |||
| 186 | Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 187 | Id value) { | ||
| 188 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 189 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 190 | return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); | ||
| 191 | } | ||
| 192 | |||
| 193 | Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 194 | Id value) { | ||
| 195 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd); | ||
| 196 | } | ||
| 197 | |||
| 198 | Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 199 | Id value) { | ||
| 200 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr); | ||
| 201 | } | ||
| 202 | |||
| 203 | Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 204 | Id value) { | ||
| 205 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor); | ||
| 206 | } | ||
| 207 | |||
| 208 | Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 209 | Id value) { | ||
| 210 | return StorageAtomicU32(ctx, binding, offset, value, &Sirit::Module::OpAtomicExchange); | ||
| 211 | } | ||
| 212 | |||
| 213 | Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 214 | Id value) { | ||
| 215 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicIAdd, | ||
| 216 | &Sirit::Module::OpIAdd); | ||
| 217 | } | ||
| 218 | |||
| 219 | Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 220 | Id value) { | ||
| 221 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMin, | ||
| 222 | &Sirit::Module::OpSMin); | ||
| 223 | } | ||
| 224 | |||
| 225 | Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 226 | Id value) { | ||
| 227 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMin, | ||
| 228 | &Sirit::Module::OpUMin); | ||
| 229 | } | ||
| 230 | |||
| 231 | Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 232 | Id value) { | ||
| 233 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicSMax, | ||
| 234 | &Sirit::Module::OpSMax); | ||
| 235 | } | ||
| 236 | |||
| 237 | Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 238 | Id value) { | ||
| 239 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicUMax, | ||
| 240 | &Sirit::Module::OpUMax); | ||
| 241 | } | ||
| 242 | |||
| 243 | Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 244 | Id value) { | ||
| 245 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicAnd, | ||
| 246 | &Sirit::Module::OpBitwiseAnd); | ||
| 247 | } | ||
| 248 | |||
| 249 | Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 250 | Id value) { | ||
| 251 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicOr, | ||
| 252 | &Sirit::Module::OpBitwiseOr); | ||
| 253 | } | ||
| 254 | |||
| 255 | Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 256 | Id value) { | ||
| 257 | return StorageAtomicU64(ctx, binding, offset, value, &Sirit::Module::OpAtomicXor, | ||
| 258 | &Sirit::Module::OpBitwiseXor); | ||
| 259 | } | ||
| 260 | |||
| 261 | Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 262 | Id value) { | ||
| 263 | if (ctx.profile.support_int64_atomics) { | ||
| 264 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U64, &StorageDefinitions::U64, | ||
| 265 | binding, offset, sizeof(u64))}; | ||
| 266 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 267 | return ctx.OpAtomicExchange(ctx.U64, pointer, scope, semantics, value); | ||
| 268 | } | ||
| 269 | LOG_ERROR(Shader_SPIRV, "Int64 atomics not supported, fallback to non-atomic"); | ||
| 270 | const Id pointer{StoragePointer(ctx, ctx.storage_types.U32x2, &StorageDefinitions::U32x2, | ||
| 271 | binding, offset, sizeof(u32[2]))}; | ||
| 272 | const Id original{ctx.OpBitcast(ctx.U64, ctx.OpLoad(ctx.U32[2], pointer))}; | ||
| 273 | ctx.OpStore(pointer, value); | ||
| 274 | return original; | ||
| 275 | } | ||
| 276 | |||
| 277 | Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 278 | Id value) { | ||
| 279 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 280 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 281 | return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); | ||
| 282 | } | ||
| 283 | |||
| 284 | Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 285 | Id value) { | ||
| 286 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 287 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 288 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; | ||
| 289 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 290 | } | ||
| 291 | |||
| 292 | Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 293 | Id value) { | ||
| 294 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 295 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 296 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; | ||
| 297 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 298 | } | ||
| 299 | |||
| 300 | Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 301 | Id value) { | ||
| 302 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 303 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 304 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; | ||
| 305 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 306 | } | ||
| 307 | |||
| 308 | Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 309 | Id value) { | ||
| 310 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 311 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 312 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; | ||
| 313 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 314 | } | ||
| 315 | |||
| 316 | Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 317 | Id value) { | ||
| 318 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 319 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 320 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; | ||
| 321 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 322 | } | ||
| 323 | |||
| 324 | Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 325 | Id value) { | ||
| 326 | const Id ssbo{ctx.ssbos[binding.U32()].U32}; | ||
| 327 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 328 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; | ||
| 329 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 330 | } | ||
| 331 | |||
| 332 | Id EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 333 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 334 | } | ||
| 335 | |||
| 336 | Id EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 337 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 338 | } | ||
| 339 | |||
| 340 | Id EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 341 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 342 | } | ||
| 343 | |||
| 344 | Id EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 345 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 346 | } | ||
| 347 | |||
| 348 | Id EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 349 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 350 | } | ||
| 351 | |||
| 352 | Id EmitGlobalAtomicInc32(EmitContext&) { | ||
| 353 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 354 | } | ||
| 355 | |||
| 356 | Id EmitGlobalAtomicDec32(EmitContext&) { | ||
| 357 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 358 | } | ||
| 359 | |||
| 360 | Id EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 361 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 362 | } | ||
| 363 | |||
| 364 | Id EmitGlobalAtomicOr32(EmitContext&) { | ||
| 365 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 366 | } | ||
| 367 | |||
| 368 | Id EmitGlobalAtomicXor32(EmitContext&) { | ||
| 369 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 370 | } | ||
| 371 | |||
| 372 | Id EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 373 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 374 | } | ||
| 375 | |||
| 376 | Id EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 377 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 378 | } | ||
| 379 | |||
| 380 | Id EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 381 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 382 | } | ||
| 383 | |||
| 384 | Id EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 385 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 386 | } | ||
| 387 | |||
| 388 | Id EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 389 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 390 | } | ||
| 391 | |||
| 392 | Id EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 393 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 394 | } | ||
| 395 | |||
| 396 | Id EmitGlobalAtomicInc64(EmitContext&) { | ||
| 397 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 398 | } | ||
| 399 | |||
| 400 | Id EmitGlobalAtomicDec64(EmitContext&) { | ||
| 401 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 402 | } | ||
| 403 | |||
| 404 | Id EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 405 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 406 | } | ||
| 407 | |||
| 408 | Id EmitGlobalAtomicOr64(EmitContext&) { | ||
| 409 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 410 | } | ||
| 411 | |||
| 412 | Id EmitGlobalAtomicXor64(EmitContext&) { | ||
| 413 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 414 | } | ||
| 415 | |||
| 416 | Id EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 417 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 418 | } | ||
| 419 | |||
| 420 | Id EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 421 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 422 | } | ||
| 423 | |||
| 424 | Id EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 425 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 426 | } | ||
| 427 | |||
| 428 | Id EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 429 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 430 | } | ||
| 431 | |||
| 432 | Id EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 433 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 434 | } | ||
| 435 | |||
| 436 | Id EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 437 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 438 | } | ||
| 439 | |||
| 440 | Id EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 441 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 442 | } | ||
| 443 | |||
| 444 | Id EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 445 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 446 | } | ||
| 447 | |||
| 448 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp new file mode 100644 index 000000000..e0b52a001 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | namespace { | ||
| 11 | void MemoryBarrier(EmitContext& ctx, spv::Scope scope) { | ||
| 12 | const auto semantics{ | ||
| 13 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | ||
| 14 | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AtomicCounterMemory | | ||
| 15 | spv::MemorySemanticsMask::ImageMemory}; | ||
| 16 | ctx.OpMemoryBarrier(ctx.Const(static_cast<u32>(scope)), ctx.Const(static_cast<u32>(semantics))); | ||
| 17 | } | ||
| 18 | } // Anonymous namespace | ||
| 19 | |||
| 20 | void EmitBarrier(EmitContext& ctx) { | ||
| 21 | const auto execution{spv::Scope::Workgroup}; | ||
| 22 | const auto memory{spv::Scope::Workgroup}; | ||
| 23 | const auto memory_semantics{spv::MemorySemanticsMask::AcquireRelease | | ||
| 24 | spv::MemorySemanticsMask::WorkgroupMemory}; | ||
| 25 | ctx.OpControlBarrier(ctx.Const(static_cast<u32>(execution)), | ||
| 26 | ctx.Const(static_cast<u32>(memory)), | ||
| 27 | ctx.Const(static_cast<u32>(memory_semantics))); | ||
| 28 | } | ||
| 29 | |||
| 30 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 31 | MemoryBarrier(ctx, spv::Scope::Workgroup); | ||
| 32 | } | ||
| 33 | |||
| 34 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 35 | MemoryBarrier(ctx, spv::Scope::Device); | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp new file mode 100644 index 000000000..bb11f4f4e --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | void EmitBitCastU16F16(EmitContext&) { | ||
| 11 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitBitCastU32F32(EmitContext& ctx, Id value) { | ||
| 15 | return ctx.OpBitcast(ctx.U32[1], value); | ||
| 16 | } | ||
| 17 | |||
| 18 | void EmitBitCastU64F64(EmitContext&) { | ||
| 19 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | void EmitBitCastF16U16(EmitContext&) { | ||
| 23 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 24 | } | ||
| 25 | |||
| 26 | Id EmitBitCastF32U32(EmitContext& ctx, Id value) { | ||
| 27 | return ctx.OpBitcast(ctx.F32[1], value); | ||
| 28 | } | ||
| 29 | |||
| 30 | void EmitBitCastF64U64(EmitContext&) { | ||
| 31 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 32 | } | ||
| 33 | |||
| 34 | Id EmitPackUint2x32(EmitContext& ctx, Id value) { | ||
| 35 | return ctx.OpBitcast(ctx.U64, value); | ||
| 36 | } | ||
| 37 | |||
| 38 | Id EmitUnpackUint2x32(EmitContext& ctx, Id value) { | ||
| 39 | return ctx.OpBitcast(ctx.U32[2], value); | ||
| 40 | } | ||
| 41 | |||
| 42 | Id EmitPackFloat2x16(EmitContext& ctx, Id value) { | ||
| 43 | return ctx.OpBitcast(ctx.U32[1], value); | ||
| 44 | } | ||
| 45 | |||
| 46 | Id EmitUnpackFloat2x16(EmitContext& ctx, Id value) { | ||
| 47 | return ctx.OpBitcast(ctx.F16[2], value); | ||
| 48 | } | ||
| 49 | |||
| 50 | Id EmitPackHalf2x16(EmitContext& ctx, Id value) { | ||
| 51 | return ctx.OpPackHalf2x16(ctx.U32[1], value); | ||
| 52 | } | ||
| 53 | |||
| 54 | Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) { | ||
| 55 | return ctx.OpUnpackHalf2x16(ctx.F32[2], value); | ||
| 56 | } | ||
| 57 | |||
| 58 | Id EmitPackDouble2x32(EmitContext& ctx, Id value) { | ||
| 59 | return ctx.OpBitcast(ctx.F64[1], value); | ||
| 60 | } | ||
| 61 | |||
| 62 | Id EmitUnpackDouble2x32(EmitContext& ctx, Id value) { | ||
| 63 | return ctx.OpBitcast(ctx.U32[2], value); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp new file mode 100644 index 000000000..10ff4ecab --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp | |||
| @@ -0,0 +1,155 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | |||
| 11 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 12 | return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 16 | return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3); | ||
| 17 | } | ||
| 18 | |||
| 19 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 20 | return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4); | ||
| 21 | } | ||
| 22 | |||
| 23 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) { | ||
| 24 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||
| 25 | } | ||
| 26 | |||
| 27 | Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index) { | ||
| 28 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||
| 29 | } | ||
| 30 | |||
| 31 | Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { | ||
| 32 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||
| 33 | } | ||
| 34 | |||
| 35 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 36 | return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); | ||
| 37 | } | ||
| 38 | |||
| 39 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 40 | return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); | ||
| 41 | } | ||
| 42 | |||
| 43 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 44 | return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); | ||
| 45 | } | ||
| 46 | |||
| 47 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 48 | return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); | ||
| 49 | } | ||
| 50 | |||
| 51 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 52 | return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); | ||
| 53 | } | ||
| 54 | |||
| 55 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 56 | return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); | ||
| 57 | } | ||
| 58 | |||
| 59 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { | ||
| 60 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||
| 61 | } | ||
| 62 | |||
| 63 | Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index) { | ||
| 64 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { | ||
| 68 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 72 | return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); | ||
| 73 | } | ||
| 74 | |||
| 75 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 76 | return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 80 | return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 84 | return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 88 | return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); | ||
| 89 | } | ||
| 90 | |||
| 91 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 92 | return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { | ||
| 96 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||
| 97 | } | ||
| 98 | |||
| 99 | Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index) { | ||
| 100 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { | ||
| 104 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 108 | return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 112 | return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 116 | return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitCompositeConstructF64x2(EmitContext&) { | ||
| 120 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitCompositeConstructF64x3(EmitContext&) { | ||
| 124 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitCompositeConstructF64x4(EmitContext&) { | ||
| 128 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitCompositeExtractF64x2(EmitContext&) { | ||
| 132 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitCompositeExtractF64x3(EmitContext&) { | ||
| 136 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitCompositeExtractF64x4(EmitContext&) { | ||
| 140 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 144 | return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 148 | return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 152 | return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); | ||
| 153 | } | ||
| 154 | |||
| 155 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp new file mode 100644 index 000000000..fb8c02a77 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -0,0 +1,505 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | #include <utility> | ||
| 7 | |||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 9 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::SPIRV { | ||
| 12 | namespace { | ||
| 13 | struct AttrInfo { | ||
| 14 | Id pointer; | ||
| 15 | Id id; | ||
| 16 | bool needs_cast; | ||
| 17 | }; | ||
| 18 | |||
| 19 | std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { | ||
| 20 | const AttributeType type{ctx.runtime_info.generic_input_types.at(index)}; | ||
| 21 | switch (type) { | ||
| 22 | case AttributeType::Float: | ||
| 23 | return AttrInfo{ctx.input_f32, ctx.F32[1], false}; | ||
| 24 | case AttributeType::UnsignedInt: | ||
| 25 | return AttrInfo{ctx.input_u32, ctx.U32[1], true}; | ||
| 26 | case AttributeType::SignedInt: | ||
| 27 | return AttrInfo{ctx.input_s32, ctx.TypeInt(32, true), true}; | ||
| 28 | case AttributeType::Disabled: | ||
| 29 | return std::nullopt; | ||
| 30 | } | ||
| 31 | throw InvalidArgument("Invalid attribute type {}", type); | ||
| 32 | } | ||
| 33 | |||
| 34 | template <typename... Args> | ||
| 35 | Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { | ||
| 36 | switch (ctx.stage) { | ||
| 37 | case Stage::TessellationControl: | ||
| 38 | case Stage::TessellationEval: | ||
| 39 | case Stage::Geometry: | ||
| 40 | return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...); | ||
| 41 | default: | ||
| 42 | return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename... Args> | ||
| 47 | Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { | ||
| 48 | if (ctx.stage == Stage::TessellationControl) { | ||
| 49 | const Id invocation_id{ctx.OpLoad(ctx.U32[1], ctx.invocation_id)}; | ||
| 50 | return ctx.OpAccessChain(result_type, base, invocation_id, std::forward<Args>(args)...); | ||
| 51 | } else { | ||
| 52 | return ctx.OpAccessChain(result_type, base, std::forward<Args>(args)...); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | struct OutAttr { | ||
| 57 | OutAttr(Id pointer_) : pointer{pointer_} {} | ||
| 58 | OutAttr(Id pointer_, Id type_) : pointer{pointer_}, type{type_} {} | ||
| 59 | |||
| 60 | Id pointer{}; | ||
| 61 | Id type{}; | ||
| 62 | }; | ||
| 63 | |||
| 64 | std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | ||
| 65 | if (IR::IsGeneric(attr)) { | ||
| 66 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 67 | const u32 element{IR::GenericAttributeElement(attr)}; | ||
| 68 | const GenericElementInfo& info{ctx.output_generics.at(index).at(element)}; | ||
| 69 | if (info.num_components == 1) { | ||
| 70 | return info.id; | ||
| 71 | } else { | ||
| 72 | const u32 index_element{element - info.first_element}; | ||
| 73 | const Id index_id{ctx.Const(index_element)}; | ||
| 74 | return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | switch (attr) { | ||
| 78 | case IR::Attribute::PointSize: | ||
| 79 | return ctx.output_point_size; | ||
| 80 | case IR::Attribute::PositionX: | ||
| 81 | case IR::Attribute::PositionY: | ||
| 82 | case IR::Attribute::PositionZ: | ||
| 83 | case IR::Attribute::PositionW: { | ||
| 84 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 85 | const Id element_id{ctx.Const(element)}; | ||
| 86 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); | ||
| 87 | } | ||
| 88 | case IR::Attribute::ClipDistance0: | ||
| 89 | case IR::Attribute::ClipDistance1: | ||
| 90 | case IR::Attribute::ClipDistance2: | ||
| 91 | case IR::Attribute::ClipDistance3: | ||
| 92 | case IR::Attribute::ClipDistance4: | ||
| 93 | case IR::Attribute::ClipDistance5: | ||
| 94 | case IR::Attribute::ClipDistance6: | ||
| 95 | case IR::Attribute::ClipDistance7: { | ||
| 96 | const u32 base{static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 97 | const u32 index{static_cast<u32>(attr) - base}; | ||
| 98 | const Id clip_num{ctx.Const(index)}; | ||
| 99 | return OutputAccessChain(ctx, ctx.output_f32, ctx.clip_distances, clip_num); | ||
| 100 | } | ||
| 101 | case IR::Attribute::Layer: | ||
| 102 | if (ctx.profile.support_viewport_index_layer_non_geometry || | ||
| 103 | ctx.stage == Shader::Stage::Geometry) { | ||
| 104 | return OutAttr{ctx.layer, ctx.U32[1]}; | ||
| 105 | } | ||
| 106 | return std::nullopt; | ||
| 107 | case IR::Attribute::ViewportIndex: | ||
| 108 | if (ctx.profile.support_viewport_index_layer_non_geometry || | ||
| 109 | ctx.stage == Shader::Stage::Geometry) { | ||
| 110 | return OutAttr{ctx.viewport_index, ctx.U32[1]}; | ||
| 111 | } | ||
| 112 | return std::nullopt; | ||
| 113 | case IR::Attribute::ViewportMask: | ||
| 114 | if (!ctx.profile.support_viewport_mask) { | ||
| 115 | return std::nullopt; | ||
| 116 | } | ||
| 117 | return OutAttr{ctx.OpAccessChain(ctx.output_u32, ctx.viewport_mask, ctx.u32_zero_value), | ||
| 118 | ctx.U32[1]}; | ||
| 119 | default: | ||
| 120 | throw NotImplementedException("Read attribute {}", attr); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, | ||
| 125 | const IR::Value& binding, const IR::Value& offset) { | ||
| 126 | if (!binding.IsImmediate()) { | ||
| 127 | throw NotImplementedException("Constant buffer indexing"); | ||
| 128 | } | ||
| 129 | const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | ||
| 130 | const Id uniform_type{ctx.uniform_types.*member_ptr}; | ||
| 131 | if (!offset.IsImmediate()) { | ||
| 132 | Id index{ctx.Def(offset)}; | ||
| 133 | if (element_size > 1) { | ||
| 134 | const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 135 | const Id shift{ctx.Const(log2_element_size)}; | ||
| 136 | index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); | ||
| 137 | } | ||
| 138 | const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; | ||
| 139 | return ctx.OpLoad(result_type, access_chain); | ||
| 140 | } | ||
| 141 | // Hardware been proved to read the aligned offset (e.g. LDC.U32 at 6 will read offset 4) | ||
| 142 | const Id imm_offset{ctx.Const(offset.U32() / element_size)}; | ||
| 143 | const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; | ||
| 144 | return ctx.OpLoad(result_type, access_chain); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 148 | return GetCbuf(ctx, ctx.U32[1], &UniformDefinitions::U32, sizeof(u32), binding, offset); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id GetCbufU32x4(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 152 | return GetCbuf(ctx, ctx.U32[4], &UniformDefinitions::U32x4, sizeof(u32[4]), binding, offset); | ||
| 153 | } | ||
| 154 | |||
| 155 | Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 index_offset) { | ||
| 156 | if (offset.IsImmediate()) { | ||
| 157 | const u32 element{(offset.U32() / 4) % 4 + index_offset}; | ||
| 158 | return ctx.OpCompositeExtract(ctx.U32[1], vector, element); | ||
| 159 | } | ||
| 160 | const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; | ||
| 161 | Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; | ||
| 162 | if (index_offset > 0) { | ||
| 163 | element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); | ||
| 164 | } | ||
| 165 | return ctx.OpVectorExtractDynamic(ctx.U32[1], vector, element); | ||
| 166 | } | ||
| 167 | } // Anonymous namespace | ||
| 168 | |||
| 169 | void EmitGetRegister(EmitContext&) { | ||
| 170 | throw LogicError("Unreachable instruction"); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitSetRegister(EmitContext&) { | ||
| 174 | throw LogicError("Unreachable instruction"); | ||
| 175 | } | ||
| 176 | |||
| 177 | void EmitGetPred(EmitContext&) { | ||
| 178 | throw LogicError("Unreachable instruction"); | ||
| 179 | } | ||
| 180 | |||
| 181 | void EmitSetPred(EmitContext&) { | ||
| 182 | throw LogicError("Unreachable instruction"); | ||
| 183 | } | ||
| 184 | |||
| 185 | void EmitSetGotoVariable(EmitContext&) { | ||
| 186 | throw LogicError("Unreachable instruction"); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitGetGotoVariable(EmitContext&) { | ||
| 190 | throw LogicError("Unreachable instruction"); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitSetIndirectBranchVariable(EmitContext&) { | ||
| 194 | throw LogicError("Unreachable instruction"); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitGetIndirectBranchVariable(EmitContext&) { | ||
| 198 | throw LogicError("Unreachable instruction"); | ||
| 199 | } | ||
| 200 | |||
| 201 | Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 202 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { | ||
| 203 | const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; | ||
| 204 | return ctx.OpUConvert(ctx.U32[1], load); | ||
| 205 | } | ||
| 206 | Id element{}; | ||
| 207 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 208 | element = GetCbufU32(ctx, binding, offset); | ||
| 209 | } else { | ||
| 210 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 211 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 212 | } | ||
| 213 | const Id bit_offset{ctx.BitOffset8(offset)}; | ||
| 214 | return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); | ||
| 215 | } | ||
| 216 | |||
| 217 | Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 218 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int8) { | ||
| 219 | const Id load{GetCbuf(ctx, ctx.S8, &UniformDefinitions::S8, sizeof(s8), binding, offset)}; | ||
| 220 | return ctx.OpSConvert(ctx.U32[1], load); | ||
| 221 | } | ||
| 222 | Id element{}; | ||
| 223 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 224 | element = GetCbufU32(ctx, binding, offset); | ||
| 225 | } else { | ||
| 226 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 227 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 228 | } | ||
| 229 | const Id bit_offset{ctx.BitOffset8(offset)}; | ||
| 230 | return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(8u)); | ||
| 231 | } | ||
| 232 | |||
| 233 | Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 234 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { | ||
| 235 | const Id load{ | ||
| 236 | GetCbuf(ctx, ctx.U16, &UniformDefinitions::U16, sizeof(u16), binding, offset)}; | ||
| 237 | return ctx.OpUConvert(ctx.U32[1], load); | ||
| 238 | } | ||
| 239 | Id element{}; | ||
| 240 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 241 | element = GetCbufU32(ctx, binding, offset); | ||
| 242 | } else { | ||
| 243 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 244 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 245 | } | ||
| 246 | const Id bit_offset{ctx.BitOffset16(offset)}; | ||
| 247 | return ctx.OpBitFieldUExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); | ||
| 248 | } | ||
| 249 | |||
| 250 | Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 251 | if (ctx.profile.support_descriptor_aliasing && ctx.profile.support_int16) { | ||
| 252 | const Id load{ | ||
| 253 | GetCbuf(ctx, ctx.S16, &UniformDefinitions::S16, sizeof(s16), binding, offset)}; | ||
| 254 | return ctx.OpSConvert(ctx.U32[1], load); | ||
| 255 | } | ||
| 256 | Id element{}; | ||
| 257 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 258 | element = GetCbufU32(ctx, binding, offset); | ||
| 259 | } else { | ||
| 260 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 261 | element = GetCbufElement(ctx, vector, offset, 0u); | ||
| 262 | } | ||
| 263 | const Id bit_offset{ctx.BitOffset16(offset)}; | ||
| 264 | return ctx.OpBitFieldSExtract(ctx.U32[1], element, bit_offset, ctx.Const(16u)); | ||
| 265 | } | ||
| 266 | |||
| 267 | Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 268 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 269 | return GetCbufU32(ctx, binding, offset); | ||
| 270 | } else { | ||
| 271 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 272 | return GetCbufElement(ctx, vector, offset, 0u); | ||
| 273 | } | ||
| 274 | } | ||
| 275 | |||
| 276 | Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 277 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 278 | return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset); | ||
| 279 | } else { | ||
| 280 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 281 | return ctx.OpBitcast(ctx.F32[1], GetCbufElement(ctx, vector, offset, 0u)); | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 286 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 287 | return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, | ||
| 288 | offset); | ||
| 289 | } else { | ||
| 290 | const Id vector{GetCbufU32x4(ctx, binding, offset)}; | ||
| 291 | return ctx.OpCompositeConstruct(ctx.U32[2], GetCbufElement(ctx, vector, offset, 0u), | ||
| 292 | GetCbufElement(ctx, vector, offset, 1u)); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | ||
| 297 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 298 | if (IR::IsGeneric(attr)) { | ||
| 299 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 300 | const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; | ||
| 301 | if (!type) { | ||
| 302 | // Attribute is disabled | ||
| 303 | return ctx.Const(element == 3 ? 1.0f : 0.0f); | ||
| 304 | } | ||
| 305 | if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { | ||
| 306 | // Varying component is not written | ||
| 307 | return ctx.Const(type && element == 3 ? 1.0f : 0.0f); | ||
| 308 | } | ||
| 309 | const Id generic_id{ctx.input_generics.at(index)}; | ||
| 310 | const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; | ||
| 311 | const Id value{ctx.OpLoad(type->id, pointer)}; | ||
| 312 | return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; | ||
| 313 | } | ||
| 314 | switch (attr) { | ||
| 315 | case IR::Attribute::PrimitiveId: | ||
| 316 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); | ||
| 317 | case IR::Attribute::PositionX: | ||
| 318 | case IR::Attribute::PositionY: | ||
| 319 | case IR::Attribute::PositionZ: | ||
| 320 | case IR::Attribute::PositionW: | ||
| 321 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, | ||
| 322 | ctx.Const(element))); | ||
| 323 | case IR::Attribute::InstanceId: | ||
| 324 | if (ctx.profile.support_vertex_instance_id) { | ||
| 325 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); | ||
| 326 | } else { | ||
| 327 | const Id index{ctx.OpLoad(ctx.U32[1], ctx.instance_index)}; | ||
| 328 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_instance)}; | ||
| 329 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | ||
| 330 | } | ||
| 331 | case IR::Attribute::VertexId: | ||
| 332 | if (ctx.profile.support_vertex_instance_id) { | ||
| 333 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.vertex_id)); | ||
| 334 | } else { | ||
| 335 | const Id index{ctx.OpLoad(ctx.U32[1], ctx.vertex_index)}; | ||
| 336 | const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | ||
| 337 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | ||
| 338 | } | ||
| 339 | case IR::Attribute::FrontFace: | ||
| 340 | return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), | ||
| 341 | ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); | ||
| 342 | case IR::Attribute::PointSpriteS: | ||
| 343 | return ctx.OpLoad(ctx.F32[1], | ||
| 344 | ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); | ||
| 345 | case IR::Attribute::PointSpriteT: | ||
| 346 | return ctx.OpLoad(ctx.F32[1], | ||
| 347 | ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.Const(1U))); | ||
| 348 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 349 | return ctx.OpLoad(ctx.F32[1], | ||
| 350 | ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.u32_zero_value)); | ||
| 351 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 352 | return ctx.OpLoad(ctx.F32[1], | ||
| 353 | ctx.OpAccessChain(ctx.input_f32, ctx.tess_coord, ctx.Const(1U))); | ||
| 354 | |||
| 355 | default: | ||
| 356 | throw NotImplementedException("Read attribute {}", attr); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { | ||
| 361 | const std::optional<OutAttr> output{OutputAttrPointer(ctx, attr)}; | ||
| 362 | if (!output) { | ||
| 363 | return; | ||
| 364 | } | ||
| 365 | if (Sirit::ValidId(output->type)) { | ||
| 366 | value = ctx.OpBitcast(output->type, value); | ||
| 367 | } | ||
| 368 | ctx.OpStore(output->pointer, value); | ||
| 369 | } | ||
| 370 | |||
| 371 | Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { | ||
| 372 | switch (ctx.stage) { | ||
| 373 | case Stage::TessellationControl: | ||
| 374 | case Stage::TessellationEval: | ||
| 375 | case Stage::Geometry: | ||
| 376 | return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); | ||
| 377 | default: | ||
| 378 | return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); | ||
| 379 | } | ||
| 380 | } | ||
| 381 | |||
| 382 | void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { | ||
| 383 | ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); | ||
| 384 | } | ||
| 385 | |||
| 386 | Id EmitGetPatch(EmitContext& ctx, IR::Patch patch) { | ||
| 387 | if (!IR::IsGeneric(patch)) { | ||
| 388 | throw NotImplementedException("Non-generic patch load"); | ||
| 389 | } | ||
| 390 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 391 | const Id element{ctx.Const(IR::GenericPatchElement(patch))}; | ||
| 392 | const Id type{ctx.stage == Stage::TessellationControl ? ctx.output_f32 : ctx.input_f32}; | ||
| 393 | const Id pointer{ctx.OpAccessChain(type, ctx.patches.at(index), element)}; | ||
| 394 | return ctx.OpLoad(ctx.F32[1], pointer); | ||
| 395 | } | ||
| 396 | |||
| 397 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { | ||
| 398 | const Id pointer{[&] { | ||
| 399 | if (IR::IsGeneric(patch)) { | ||
| 400 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 401 | const Id element{ctx.Const(IR::GenericPatchElement(patch))}; | ||
| 402 | return ctx.OpAccessChain(ctx.output_f32, ctx.patches.at(index), element); | ||
| 403 | } | ||
| 404 | switch (patch) { | ||
| 405 | case IR::Patch::TessellationLodLeft: | ||
| 406 | case IR::Patch::TessellationLodRight: | ||
| 407 | case IR::Patch::TessellationLodTop: | ||
| 408 | case IR::Patch::TessellationLodBottom: { | ||
| 409 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 410 | const Id index_id{ctx.Const(index)}; | ||
| 411 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_outer, index_id); | ||
| 412 | } | ||
| 413 | case IR::Patch::TessellationLodInteriorU: | ||
| 414 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, | ||
| 415 | ctx.u32_zero_value); | ||
| 416 | case IR::Patch::TessellationLodInteriorV: | ||
| 417 | return ctx.OpAccessChain(ctx.output_f32, ctx.output_tess_level_inner, ctx.Const(1u)); | ||
| 418 | default: | ||
| 419 | throw NotImplementedException("Patch {}", patch); | ||
| 420 | } | ||
| 421 | }()}; | ||
| 422 | ctx.OpStore(pointer, value); | ||
| 423 | } | ||
| 424 | |||
| 425 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { | ||
| 426 | const Id component_id{ctx.Const(component)}; | ||
| 427 | const Id pointer{ctx.OpAccessChain(ctx.output_f32, ctx.frag_color.at(index), component_id)}; | ||
| 428 | ctx.OpStore(pointer, value); | ||
| 429 | } | ||
| 430 | |||
| 431 | void EmitSetSampleMask(EmitContext& ctx, Id value) { | ||
| 432 | ctx.OpStore(ctx.sample_mask, value); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | ||
| 436 | ctx.OpStore(ctx.frag_depth, value); | ||
| 437 | } | ||
| 438 | |||
| 439 | void EmitGetZFlag(EmitContext&) { | ||
| 440 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 441 | } | ||
| 442 | |||
| 443 | void EmitGetSFlag(EmitContext&) { | ||
| 444 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 445 | } | ||
| 446 | |||
| 447 | void EmitGetCFlag(EmitContext&) { | ||
| 448 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 449 | } | ||
| 450 | |||
| 451 | void EmitGetOFlag(EmitContext&) { | ||
| 452 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 453 | } | ||
| 454 | |||
| 455 | void EmitSetZFlag(EmitContext&) { | ||
| 456 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 457 | } | ||
| 458 | |||
| 459 | void EmitSetSFlag(EmitContext&) { | ||
| 460 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 461 | } | ||
| 462 | |||
| 463 | void EmitSetCFlag(EmitContext&) { | ||
| 464 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 465 | } | ||
| 466 | |||
| 467 | void EmitSetOFlag(EmitContext&) { | ||
| 468 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 469 | } | ||
| 470 | |||
| 471 | Id EmitWorkgroupId(EmitContext& ctx) { | ||
| 472 | return ctx.OpLoad(ctx.U32[3], ctx.workgroup_id); | ||
| 473 | } | ||
| 474 | |||
| 475 | Id EmitLocalInvocationId(EmitContext& ctx) { | ||
| 476 | return ctx.OpLoad(ctx.U32[3], ctx.local_invocation_id); | ||
| 477 | } | ||
| 478 | |||
| 479 | Id EmitInvocationId(EmitContext& ctx) { | ||
| 480 | return ctx.OpLoad(ctx.U32[1], ctx.invocation_id); | ||
| 481 | } | ||
| 482 | |||
| 483 | Id EmitSampleId(EmitContext& ctx) { | ||
| 484 | return ctx.OpLoad(ctx.U32[1], ctx.sample_id); | ||
| 485 | } | ||
| 486 | |||
| 487 | Id EmitIsHelperInvocation(EmitContext& ctx) { | ||
| 488 | return ctx.OpLoad(ctx.U1, ctx.is_helper_invocation); | ||
| 489 | } | ||
| 490 | |||
| 491 | Id EmitYDirection(EmitContext& ctx) { | ||
| 492 | return ctx.Const(ctx.runtime_info.y_negate ? -1.0f : 1.0f); | ||
| 493 | } | ||
| 494 | |||
| 495 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset) { | ||
| 496 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 497 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 498 | } | ||
| 499 | |||
| 500 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value) { | ||
| 501 | const Id pointer{ctx.OpAccessChain(ctx.private_u32, ctx.local_memory, word_offset)}; | ||
| 502 | ctx.OpStore(pointer, value); | ||
| 503 | } | ||
| 504 | |||
| 505 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp new file mode 100644 index 000000000..d33486f28 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_control_flow.cpp | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | void EmitJoin(EmitContext&) { | ||
| 11 | throw NotImplementedException("Join shouldn't be emitted"); | ||
| 12 | } | ||
| 13 | |||
| 14 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 15 | if (ctx.profile.support_demote_to_helper_invocation) { | ||
| 16 | ctx.OpDemoteToHelperInvocationEXT(); | ||
| 17 | } else { | ||
| 18 | const Id kill_label{ctx.OpLabel()}; | ||
| 19 | const Id impossible_label{ctx.OpLabel()}; | ||
| 20 | ctx.OpSelectionMerge(impossible_label, spv::SelectionControlMask::MaskNone); | ||
| 21 | ctx.OpBranchConditional(ctx.true_value, kill_label, impossible_label); | ||
| 22 | ctx.AddLabel(kill_label); | ||
| 23 | ctx.OpKill(); | ||
| 24 | ctx.AddLabel(impossible_label); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp new file mode 100644 index 000000000..fd42b7a16 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | |||
| @@ -0,0 +1,269 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id ExtractU16(EmitContext& ctx, Id value) { | ||
| 11 | if (ctx.profile.support_int16) { | ||
| 12 | return ctx.OpUConvert(ctx.U16, value); | ||
| 13 | } else { | ||
| 14 | return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); | ||
| 15 | } | ||
| 16 | } | ||
| 17 | |||
| 18 | Id ExtractS16(EmitContext& ctx, Id value) { | ||
| 19 | if (ctx.profile.support_int16) { | ||
| 20 | return ctx.OpSConvert(ctx.S16, value); | ||
| 21 | } else { | ||
| 22 | return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(16u)); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | Id ExtractU8(EmitContext& ctx, Id value) { | ||
| 27 | if (ctx.profile.support_int8) { | ||
| 28 | return ctx.OpUConvert(ctx.U8, value); | ||
| 29 | } else { | ||
| 30 | return ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | Id ExtractS8(EmitContext& ctx, Id value) { | ||
| 35 | if (ctx.profile.support_int8) { | ||
| 36 | return ctx.OpSConvert(ctx.S8, value); | ||
| 37 | } else { | ||
| 38 | return ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.u32_zero_value, ctx.Const(8u)); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | Id EmitConvertS16F16(EmitContext& ctx, Id value) { | ||
| 44 | if (ctx.profile.support_int16) { | ||
| 45 | return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); | ||
| 46 | } else { | ||
| 47 | return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | Id EmitConvertS16F32(EmitContext& ctx, Id value) { | ||
| 52 | if (ctx.profile.support_int16) { | ||
| 53 | return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); | ||
| 54 | } else { | ||
| 55 | return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | Id EmitConvertS16F64(EmitContext& ctx, Id value) { | ||
| 60 | if (ctx.profile.support_int16) { | ||
| 61 | return ctx.OpSConvert(ctx.U32[1], ctx.OpConvertFToS(ctx.U16, value)); | ||
| 62 | } else { | ||
| 63 | return ExtractS16(ctx, ctx.OpConvertFToS(ctx.U32[1], value)); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitConvertS32F16(EmitContext& ctx, Id value) { | ||
| 68 | return ctx.OpConvertFToS(ctx.U32[1], value); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitConvertS32F32(EmitContext& ctx, Id value) { | ||
| 72 | if (ctx.profile.has_broken_signed_operations) { | ||
| 73 | return ctx.OpBitcast(ctx.U32[1], ctx.OpConvertFToS(ctx.S32[1], value)); | ||
| 74 | } else { | ||
| 75 | return ctx.OpConvertFToS(ctx.U32[1], value); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitConvertS32F64(EmitContext& ctx, Id value) { | ||
| 80 | return ctx.OpConvertFToS(ctx.U32[1], value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitConvertS64F16(EmitContext& ctx, Id value) { | ||
| 84 | return ctx.OpConvertFToS(ctx.U64, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitConvertS64F32(EmitContext& ctx, Id value) { | ||
| 88 | return ctx.OpConvertFToS(ctx.U64, value); | ||
| 89 | } | ||
| 90 | |||
| 91 | Id EmitConvertS64F64(EmitContext& ctx, Id value) { | ||
| 92 | return ctx.OpConvertFToS(ctx.U64, value); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitConvertU16F16(EmitContext& ctx, Id value) { | ||
| 96 | if (ctx.profile.support_int16) { | ||
| 97 | return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); | ||
| 98 | } else { | ||
| 99 | return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitConvertU16F32(EmitContext& ctx, Id value) { | ||
| 104 | if (ctx.profile.support_int16) { | ||
| 105 | return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); | ||
| 106 | } else { | ||
| 107 | return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitConvertU16F64(EmitContext& ctx, Id value) { | ||
| 112 | if (ctx.profile.support_int16) { | ||
| 113 | return ctx.OpUConvert(ctx.U32[1], ctx.OpConvertFToU(ctx.U16, value)); | ||
| 114 | } else { | ||
| 115 | return ExtractU16(ctx, ctx.OpConvertFToU(ctx.U32[1], value)); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitConvertU32F16(EmitContext& ctx, Id value) { | ||
| 120 | return ctx.OpConvertFToU(ctx.U32[1], value); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitConvertU32F32(EmitContext& ctx, Id value) { | ||
| 124 | return ctx.OpConvertFToU(ctx.U32[1], value); | ||
| 125 | } | ||
| 126 | |||
| 127 | Id EmitConvertU32F64(EmitContext& ctx, Id value) { | ||
| 128 | return ctx.OpConvertFToU(ctx.U32[1], value); | ||
| 129 | } | ||
| 130 | |||
| 131 | Id EmitConvertU64F16(EmitContext& ctx, Id value) { | ||
| 132 | return ctx.OpConvertFToU(ctx.U64, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | Id EmitConvertU64F32(EmitContext& ctx, Id value) { | ||
| 136 | return ctx.OpConvertFToU(ctx.U64, value); | ||
| 137 | } | ||
| 138 | |||
| 139 | Id EmitConvertU64F64(EmitContext& ctx, Id value) { | ||
| 140 | return ctx.OpConvertFToU(ctx.U64, value); | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitConvertU64U32(EmitContext& ctx, Id value) { | ||
| 144 | return ctx.OpUConvert(ctx.U64, value); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id EmitConvertU32U64(EmitContext& ctx, Id value) { | ||
| 148 | return ctx.OpUConvert(ctx.U32[1], value); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id EmitConvertF16F32(EmitContext& ctx, Id value) { | ||
| 152 | return ctx.OpFConvert(ctx.F16[1], value); | ||
| 153 | } | ||
| 154 | |||
| 155 | Id EmitConvertF32F16(EmitContext& ctx, Id value) { | ||
| 156 | return ctx.OpFConvert(ctx.F32[1], value); | ||
| 157 | } | ||
| 158 | |||
| 159 | Id EmitConvertF32F64(EmitContext& ctx, Id value) { | ||
| 160 | return ctx.OpFConvert(ctx.F32[1], value); | ||
| 161 | } | ||
| 162 | |||
| 163 | Id EmitConvertF64F32(EmitContext& ctx, Id value) { | ||
| 164 | return ctx.OpFConvert(ctx.F64[1], value); | ||
| 165 | } | ||
| 166 | |||
| 167 | Id EmitConvertF16S8(EmitContext& ctx, Id value) { | ||
| 168 | return ctx.OpConvertSToF(ctx.F16[1], ExtractS8(ctx, value)); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitConvertF16S16(EmitContext& ctx, Id value) { | ||
| 172 | return ctx.OpConvertSToF(ctx.F16[1], ExtractS16(ctx, value)); | ||
| 173 | } | ||
| 174 | |||
| 175 | Id EmitConvertF16S32(EmitContext& ctx, Id value) { | ||
| 176 | return ctx.OpConvertSToF(ctx.F16[1], value); | ||
| 177 | } | ||
| 178 | |||
| 179 | Id EmitConvertF16S64(EmitContext& ctx, Id value) { | ||
| 180 | return ctx.OpConvertSToF(ctx.F16[1], value); | ||
| 181 | } | ||
| 182 | |||
| 183 | Id EmitConvertF16U8(EmitContext& ctx, Id value) { | ||
| 184 | return ctx.OpConvertUToF(ctx.F16[1], ExtractU8(ctx, value)); | ||
| 185 | } | ||
| 186 | |||
| 187 | Id EmitConvertF16U16(EmitContext& ctx, Id value) { | ||
| 188 | return ctx.OpConvertUToF(ctx.F16[1], ExtractU16(ctx, value)); | ||
| 189 | } | ||
| 190 | |||
| 191 | Id EmitConvertF16U32(EmitContext& ctx, Id value) { | ||
| 192 | return ctx.OpConvertUToF(ctx.F16[1], value); | ||
| 193 | } | ||
| 194 | |||
| 195 | Id EmitConvertF16U64(EmitContext& ctx, Id value) { | ||
| 196 | return ctx.OpConvertUToF(ctx.F16[1], value); | ||
| 197 | } | ||
| 198 | |||
| 199 | Id EmitConvertF32S8(EmitContext& ctx, Id value) { | ||
| 200 | return ctx.OpConvertSToF(ctx.F32[1], ExtractS8(ctx, value)); | ||
| 201 | } | ||
| 202 | |||
| 203 | Id EmitConvertF32S16(EmitContext& ctx, Id value) { | ||
| 204 | return ctx.OpConvertSToF(ctx.F32[1], ExtractS16(ctx, value)); | ||
| 205 | } | ||
| 206 | |||
| 207 | Id EmitConvertF32S32(EmitContext& ctx, Id value) { | ||
| 208 | if (ctx.profile.has_broken_signed_operations) { | ||
| 209 | value = ctx.OpBitcast(ctx.S32[1], value); | ||
| 210 | } | ||
| 211 | return ctx.OpConvertSToF(ctx.F32[1], value); | ||
| 212 | } | ||
| 213 | |||
| 214 | Id EmitConvertF32S64(EmitContext& ctx, Id value) { | ||
| 215 | return ctx.OpConvertSToF(ctx.F32[1], value); | ||
| 216 | } | ||
| 217 | |||
| 218 | Id EmitConvertF32U8(EmitContext& ctx, Id value) { | ||
| 219 | return ctx.OpConvertUToF(ctx.F32[1], ExtractU8(ctx, value)); | ||
| 220 | } | ||
| 221 | |||
| 222 | Id EmitConvertF32U16(EmitContext& ctx, Id value) { | ||
| 223 | return ctx.OpConvertUToF(ctx.F32[1], ExtractU16(ctx, value)); | ||
| 224 | } | ||
| 225 | |||
| 226 | Id EmitConvertF32U32(EmitContext& ctx, Id value) { | ||
| 227 | return ctx.OpConvertUToF(ctx.F32[1], value); | ||
| 228 | } | ||
| 229 | |||
| 230 | Id EmitConvertF32U64(EmitContext& ctx, Id value) { | ||
| 231 | return ctx.OpConvertUToF(ctx.F32[1], value); | ||
| 232 | } | ||
| 233 | |||
| 234 | Id EmitConvertF64S8(EmitContext& ctx, Id value) { | ||
| 235 | return ctx.OpConvertSToF(ctx.F64[1], ExtractS8(ctx, value)); | ||
| 236 | } | ||
| 237 | |||
| 238 | Id EmitConvertF64S16(EmitContext& ctx, Id value) { | ||
| 239 | return ctx.OpConvertSToF(ctx.F64[1], ExtractS16(ctx, value)); | ||
| 240 | } | ||
| 241 | |||
| 242 | Id EmitConvertF64S32(EmitContext& ctx, Id value) { | ||
| 243 | if (ctx.profile.has_broken_signed_operations) { | ||
| 244 | value = ctx.OpBitcast(ctx.S32[1], value); | ||
| 245 | } | ||
| 246 | return ctx.OpConvertSToF(ctx.F64[1], value); | ||
| 247 | } | ||
| 248 | |||
| 249 | Id EmitConvertF64S64(EmitContext& ctx, Id value) { | ||
| 250 | return ctx.OpConvertSToF(ctx.F64[1], value); | ||
| 251 | } | ||
| 252 | |||
| 253 | Id EmitConvertF64U8(EmitContext& ctx, Id value) { | ||
| 254 | return ctx.OpConvertUToF(ctx.F64[1], ExtractU8(ctx, value)); | ||
| 255 | } | ||
| 256 | |||
| 257 | Id EmitConvertF64U16(EmitContext& ctx, Id value) { | ||
| 258 | return ctx.OpConvertUToF(ctx.F64[1], ExtractU16(ctx, value)); | ||
| 259 | } | ||
| 260 | |||
| 261 | Id EmitConvertF64U32(EmitContext& ctx, Id value) { | ||
| 262 | return ctx.OpConvertUToF(ctx.F64[1], value); | ||
| 263 | } | ||
| 264 | |||
| 265 | Id EmitConvertF64U64(EmitContext& ctx, Id value) { | ||
| 266 | return ctx.OpConvertUToF(ctx.F64[1], value); | ||
| 267 | } | ||
| 268 | |||
| 269 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp new file mode 100644 index 000000000..61cf25f9c --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp | |||
| @@ -0,0 +1,396 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | namespace { | ||
| 11 | Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { | ||
| 12 | const auto flags{inst->Flags<IR::FpControl>()}; | ||
| 13 | if (flags.no_contraction) { | ||
| 14 | ctx.Decorate(op, spv::Decoration::NoContraction); | ||
| 15 | } | ||
| 16 | return op; | ||
| 17 | } | ||
| 18 | |||
| 19 | Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) { | ||
| 20 | if (ctx.profile.has_broken_spirv_clamp) { | ||
| 21 | return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); | ||
| 22 | } else { | ||
| 23 | return ctx.OpFClamp(type, value, zero, one); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | Id FPOrdNotEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 28 | if (ctx.profile.ignore_nan_fp_comparisons) { | ||
| 29 | const Id comp{ctx.OpFOrdEqual(ctx.U1, lhs, rhs)}; | ||
| 30 | const Id lhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, lhs))}; | ||
| 31 | const Id rhs_not_nan{ctx.OpLogicalNot(ctx.U1, ctx.OpIsNan(ctx.U1, rhs))}; | ||
| 32 | return ctx.OpLogicalAnd(ctx.U1, ctx.OpLogicalAnd(ctx.U1, comp, lhs_not_nan), rhs_not_nan); | ||
| 33 | } else { | ||
| 34 | return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | Id FPUnordCompare(Id (EmitContext::*comp_func)(Id, Id, Id), EmitContext& ctx, Id lhs, Id rhs) { | ||
| 39 | if (ctx.profile.ignore_nan_fp_comparisons) { | ||
| 40 | const Id lhs_nan{ctx.OpIsNan(ctx.U1, lhs)}; | ||
| 41 | const Id rhs_nan{ctx.OpIsNan(ctx.U1, rhs)}; | ||
| 42 | const Id comp{(ctx.*comp_func)(ctx.U1, lhs, rhs)}; | ||
| 43 | return ctx.OpLogicalOr(ctx.U1, ctx.OpLogicalOr(ctx.U1, comp, lhs_nan), rhs_nan); | ||
| 44 | } else { | ||
| 45 | return (ctx.*comp_func)(ctx.U1, lhs, rhs); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | Id EmitFPAbs16(EmitContext& ctx, Id value) { | ||
| 51 | return ctx.OpFAbs(ctx.F16[1], value); | ||
| 52 | } | ||
| 53 | |||
| 54 | Id EmitFPAbs32(EmitContext& ctx, Id value) { | ||
| 55 | return ctx.OpFAbs(ctx.F32[1], value); | ||
| 56 | } | ||
| 57 | |||
| 58 | Id EmitFPAbs64(EmitContext& ctx, Id value) { | ||
| 59 | return ctx.OpFAbs(ctx.F64[1], value); | ||
| 60 | } | ||
| 61 | |||
| 62 | Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 63 | return Decorate(ctx, inst, ctx.OpFAdd(ctx.F16[1], a, b)); | ||
| 64 | } | ||
| 65 | |||
| 66 | Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 67 | return Decorate(ctx, inst, ctx.OpFAdd(ctx.F32[1], a, b)); | ||
| 68 | } | ||
| 69 | |||
| 70 | Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 71 | return Decorate(ctx, inst, ctx.OpFAdd(ctx.F64[1], a, b)); | ||
| 72 | } | ||
| 73 | |||
| 74 | Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { | ||
| 75 | return Decorate(ctx, inst, ctx.OpFma(ctx.F16[1], a, b, c)); | ||
| 76 | } | ||
| 77 | |||
| 78 | Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { | ||
| 79 | return Decorate(ctx, inst, ctx.OpFma(ctx.F32[1], a, b, c)); | ||
| 80 | } | ||
| 81 | |||
| 82 | Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { | ||
| 83 | return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); | ||
| 84 | } | ||
| 85 | |||
| 86 | Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { | ||
| 87 | return ctx.OpFMax(ctx.F32[1], a, b); | ||
| 88 | } | ||
| 89 | |||
| 90 | Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { | ||
| 91 | return ctx.OpFMax(ctx.F64[1], a, b); | ||
| 92 | } | ||
| 93 | |||
| 94 | Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { | ||
| 95 | return ctx.OpFMin(ctx.F32[1], a, b); | ||
| 96 | } | ||
| 97 | |||
| 98 | Id EmitFPMin64(EmitContext& ctx, Id a, Id b) { | ||
| 99 | return ctx.OpFMin(ctx.F64[1], a, b); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 103 | return Decorate(ctx, inst, ctx.OpFMul(ctx.F16[1], a, b)); | ||
| 104 | } | ||
| 105 | |||
| 106 | Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 107 | return Decorate(ctx, inst, ctx.OpFMul(ctx.F32[1], a, b)); | ||
| 108 | } | ||
| 109 | |||
| 110 | Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 111 | return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b)); | ||
| 112 | } | ||
| 113 | |||
| 114 | Id EmitFPNeg16(EmitContext& ctx, Id value) { | ||
| 115 | return ctx.OpFNegate(ctx.F16[1], value); | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitFPNeg32(EmitContext& ctx, Id value) { | ||
| 119 | return ctx.OpFNegate(ctx.F32[1], value); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id EmitFPNeg64(EmitContext& ctx, Id value) { | ||
| 123 | return ctx.OpFNegate(ctx.F64[1], value); | ||
| 124 | } | ||
| 125 | |||
| 126 | Id EmitFPSin(EmitContext& ctx, Id value) { | ||
| 127 | return ctx.OpSin(ctx.F32[1], value); | ||
| 128 | } | ||
| 129 | |||
| 130 | Id EmitFPCos(EmitContext& ctx, Id value) { | ||
| 131 | return ctx.OpCos(ctx.F32[1], value); | ||
| 132 | } | ||
| 133 | |||
| 134 | Id EmitFPExp2(EmitContext& ctx, Id value) { | ||
| 135 | return ctx.OpExp2(ctx.F32[1], value); | ||
| 136 | } | ||
| 137 | |||
| 138 | Id EmitFPLog2(EmitContext& ctx, Id value) { | ||
| 139 | return ctx.OpLog2(ctx.F32[1], value); | ||
| 140 | } | ||
| 141 | |||
| 142 | Id EmitFPRecip32(EmitContext& ctx, Id value) { | ||
| 143 | return ctx.OpFDiv(ctx.F32[1], ctx.Const(1.0f), value); | ||
| 144 | } | ||
| 145 | |||
| 146 | Id EmitFPRecip64(EmitContext& ctx, Id value) { | ||
| 147 | return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value); | ||
| 148 | } | ||
| 149 | |||
| 150 | Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) { | ||
| 151 | return ctx.OpInverseSqrt(ctx.F32[1], value); | ||
| 152 | } | ||
| 153 | |||
| 154 | Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) { | ||
| 155 | return ctx.OpInverseSqrt(ctx.F64[1], value); | ||
| 156 | } | ||
| 157 | |||
| 158 | Id EmitFPSqrt(EmitContext& ctx, Id value) { | ||
| 159 | return ctx.OpSqrt(ctx.F32[1], value); | ||
| 160 | } | ||
| 161 | |||
| 162 | Id EmitFPSaturate16(EmitContext& ctx, Id value) { | ||
| 163 | const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; | ||
| 164 | const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; | ||
| 165 | return Clamp(ctx, ctx.F16[1], value, zero, one); | ||
| 166 | } | ||
| 167 | |||
| 168 | Id EmitFPSaturate32(EmitContext& ctx, Id value) { | ||
| 169 | const Id zero{ctx.Const(f32{0.0})}; | ||
| 170 | const Id one{ctx.Const(f32{1.0})}; | ||
| 171 | return Clamp(ctx, ctx.F32[1], value, zero, one); | ||
| 172 | } | ||
| 173 | |||
| 174 | Id EmitFPSaturate64(EmitContext& ctx, Id value) { | ||
| 175 | const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; | ||
| 176 | const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; | ||
| 177 | return Clamp(ctx, ctx.F64[1], value, zero, one); | ||
| 178 | } | ||
| 179 | |||
| 180 | Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) { | ||
| 181 | return Clamp(ctx, ctx.F16[1], value, min_value, max_value); | ||
| 182 | } | ||
| 183 | |||
| 184 | Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) { | ||
| 185 | return Clamp(ctx, ctx.F32[1], value, min_value, max_value); | ||
| 186 | } | ||
| 187 | |||
| 188 | Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) { | ||
| 189 | return Clamp(ctx, ctx.F64[1], value, min_value, max_value); | ||
| 190 | } | ||
| 191 | |||
| 192 | Id EmitFPRoundEven16(EmitContext& ctx, Id value) { | ||
| 193 | return ctx.OpRoundEven(ctx.F16[1], value); | ||
| 194 | } | ||
| 195 | |||
| 196 | Id EmitFPRoundEven32(EmitContext& ctx, Id value) { | ||
| 197 | return ctx.OpRoundEven(ctx.F32[1], value); | ||
| 198 | } | ||
| 199 | |||
| 200 | Id EmitFPRoundEven64(EmitContext& ctx, Id value) { | ||
| 201 | return ctx.OpRoundEven(ctx.F64[1], value); | ||
| 202 | } | ||
| 203 | |||
| 204 | Id EmitFPFloor16(EmitContext& ctx, Id value) { | ||
| 205 | return ctx.OpFloor(ctx.F16[1], value); | ||
| 206 | } | ||
| 207 | |||
| 208 | Id EmitFPFloor32(EmitContext& ctx, Id value) { | ||
| 209 | return ctx.OpFloor(ctx.F32[1], value); | ||
| 210 | } | ||
| 211 | |||
| 212 | Id EmitFPFloor64(EmitContext& ctx, Id value) { | ||
| 213 | return ctx.OpFloor(ctx.F64[1], value); | ||
| 214 | } | ||
| 215 | |||
| 216 | Id EmitFPCeil16(EmitContext& ctx, Id value) { | ||
| 217 | return ctx.OpCeil(ctx.F16[1], value); | ||
| 218 | } | ||
| 219 | |||
| 220 | Id EmitFPCeil32(EmitContext& ctx, Id value) { | ||
| 221 | return ctx.OpCeil(ctx.F32[1], value); | ||
| 222 | } | ||
| 223 | |||
| 224 | Id EmitFPCeil64(EmitContext& ctx, Id value) { | ||
| 225 | return ctx.OpCeil(ctx.F64[1], value); | ||
| 226 | } | ||
| 227 | |||
| 228 | Id EmitFPTrunc16(EmitContext& ctx, Id value) { | ||
| 229 | return ctx.OpTrunc(ctx.F16[1], value); | ||
| 230 | } | ||
| 231 | |||
| 232 | Id EmitFPTrunc32(EmitContext& ctx, Id value) { | ||
| 233 | return ctx.OpTrunc(ctx.F32[1], value); | ||
| 234 | } | ||
| 235 | |||
| 236 | Id EmitFPTrunc64(EmitContext& ctx, Id value) { | ||
| 237 | return ctx.OpTrunc(ctx.F64[1], value); | ||
| 238 | } | ||
| 239 | |||
| 240 | Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 241 | return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); | ||
| 242 | } | ||
| 243 | |||
| 244 | Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 245 | return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); | ||
| 246 | } | ||
| 247 | |||
| 248 | Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 249 | return ctx.OpFOrdEqual(ctx.U1, lhs, rhs); | ||
| 250 | } | ||
| 251 | |||
| 252 | Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 253 | return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); | ||
| 254 | } | ||
| 255 | |||
| 256 | Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 257 | return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); | ||
| 258 | } | ||
| 259 | |||
| 260 | Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 261 | return FPUnordCompare(&EmitContext::OpFUnordEqual, ctx, lhs, rhs); | ||
| 262 | } | ||
| 263 | |||
| 264 | Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 265 | return FPOrdNotEqual(ctx, lhs, rhs); | ||
| 266 | } | ||
| 267 | |||
| 268 | Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 269 | return FPOrdNotEqual(ctx, lhs, rhs); | ||
| 270 | } | ||
| 271 | |||
| 272 | Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 273 | return FPOrdNotEqual(ctx, lhs, rhs); | ||
| 274 | } | ||
| 275 | |||
| 276 | Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 277 | return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); | ||
| 278 | } | ||
| 279 | |||
| 280 | Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 281 | return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); | ||
| 282 | } | ||
| 283 | |||
| 284 | Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 285 | return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs); | ||
| 286 | } | ||
| 287 | |||
| 288 | Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 289 | return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); | ||
| 290 | } | ||
| 291 | |||
| 292 | Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 293 | return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); | ||
| 294 | } | ||
| 295 | |||
| 296 | Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 297 | return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs); | ||
| 298 | } | ||
| 299 | |||
| 300 | Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 301 | return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); | ||
| 302 | } | ||
| 303 | |||
| 304 | Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 305 | return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); | ||
| 306 | } | ||
| 307 | |||
| 308 | Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 309 | return FPUnordCompare(&EmitContext::OpFUnordLessThan, ctx, lhs, rhs); | ||
| 310 | } | ||
| 311 | |||
| 312 | Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 313 | return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); | ||
| 314 | } | ||
| 315 | |||
| 316 | Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 317 | return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); | ||
| 318 | } | ||
| 319 | |||
| 320 | Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 321 | return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs); | ||
| 322 | } | ||
| 323 | |||
| 324 | Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 325 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); | ||
| 326 | } | ||
| 327 | |||
| 328 | Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 329 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); | ||
| 330 | } | ||
| 331 | |||
| 332 | Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 333 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThan, ctx, lhs, rhs); | ||
| 334 | } | ||
| 335 | |||
| 336 | Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 337 | return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); | ||
| 338 | } | ||
| 339 | |||
| 340 | Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 341 | return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); | ||
| 342 | } | ||
| 343 | |||
| 344 | Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 345 | return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs); | ||
| 346 | } | ||
| 347 | |||
| 348 | Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 349 | return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); | ||
| 350 | } | ||
| 351 | |||
| 352 | Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 353 | return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); | ||
| 354 | } | ||
| 355 | |||
| 356 | Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 357 | return FPUnordCompare(&EmitContext::OpFUnordLessThanEqual, ctx, lhs, rhs); | ||
| 358 | } | ||
| 359 | |||
| 360 | Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 361 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 362 | } | ||
| 363 | |||
| 364 | Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 365 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 366 | } | ||
| 367 | |||
| 368 | Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 369 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 370 | } | ||
| 371 | |||
| 372 | Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 373 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); | ||
| 374 | } | ||
| 375 | |||
| 376 | Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 377 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); | ||
| 378 | } | ||
| 379 | |||
| 380 | Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 381 | return FPUnordCompare(&EmitContext::OpFUnordGreaterThanEqual, ctx, lhs, rhs); | ||
| 382 | } | ||
| 383 | |||
| 384 | Id EmitFPIsNan16(EmitContext& ctx, Id value) { | ||
| 385 | return ctx.OpIsNan(ctx.U1, value); | ||
| 386 | } | ||
| 387 | |||
| 388 | Id EmitFPIsNan32(EmitContext& ctx, Id value) { | ||
| 389 | return ctx.OpIsNan(ctx.U1, value); | ||
| 390 | } | ||
| 391 | |||
| 392 | Id EmitFPIsNan64(EmitContext& ctx, Id value) { | ||
| 393 | return ctx.OpIsNan(ctx.U1, value); | ||
| 394 | } | ||
| 395 | |||
| 396 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp new file mode 100644 index 000000000..3588f052b --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -0,0 +1,462 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <boost/container/static_vector.hpp> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::SPIRV { | ||
| 12 | namespace { | ||
| 13 | class ImageOperands { | ||
| 14 | public: | ||
| 15 | explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp, | ||
| 16 | Id lod, const IR::Value& offset) { | ||
| 17 | if (has_bias) { | ||
| 18 | const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||
| 19 | Add(spv::ImageOperandsMask::Bias, bias); | ||
| 20 | } | ||
| 21 | if (has_lod) { | ||
| 22 | const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod}; | ||
| 23 | Add(spv::ImageOperandsMask::Lod, lod_value); | ||
| 24 | } | ||
| 25 | AddOffset(ctx, offset); | ||
| 26 | if (has_lod_clamp) { | ||
| 27 | const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod}; | ||
| 28 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) { | ||
| 33 | if (offset2.IsEmpty()) { | ||
| 34 | if (offset.IsEmpty()) { | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | const std::array values{offset.InstRecursive(), offset2.InstRecursive()}; | ||
| 41 | if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) { | ||
| 42 | LOG_WARNING(Shader_SPIRV, "Not all arguments in PTP are immediate, ignoring"); | ||
| 43 | return; | ||
| 44 | } | ||
| 45 | const IR::Opcode opcode{values[0]->GetOpcode()}; | ||
| 46 | if (opcode != values[1]->GetOpcode() || opcode != IR::Opcode::CompositeConstructU32x4) { | ||
| 47 | throw LogicError("Invalid PTP arguments"); | ||
| 48 | } | ||
| 49 | auto read{[&](unsigned int a, unsigned int b) { return values[a]->Arg(b).U32(); }}; | ||
| 50 | |||
| 51 | const Id offsets{ctx.ConstantComposite( | ||
| 52 | ctx.TypeArray(ctx.U32[2], ctx.Const(4U)), ctx.Const(read(0, 0), read(0, 1)), | ||
| 53 | ctx.Const(read(0, 2), read(0, 3)), ctx.Const(read(1, 0), read(1, 1)), | ||
| 54 | ctx.Const(read(1, 2), read(1, 3)))}; | ||
| 55 | Add(spv::ImageOperandsMask::ConstOffsets, offsets); | ||
| 56 | } | ||
| 57 | |||
| 58 | explicit ImageOperands(Id offset, Id lod, Id ms) { | ||
| 59 | if (Sirit::ValidId(lod)) { | ||
| 60 | Add(spv::ImageOperandsMask::Lod, lod); | ||
| 61 | } | ||
| 62 | if (Sirit::ValidId(offset)) { | ||
| 63 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 64 | } | ||
| 65 | if (Sirit::ValidId(ms)) { | ||
| 66 | Add(spv::ImageOperandsMask::Sample, ms); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, | ||
| 71 | Id offset, Id lod_clamp) { | ||
| 72 | if (!Sirit::ValidId(derivates)) { | ||
| 73 | throw LogicError("Derivates must be present"); | ||
| 74 | } | ||
| 75 | boost::container::static_vector<Id, 3> deriv_x_accum; | ||
| 76 | boost::container::static_vector<Id, 3> deriv_y_accum; | ||
| 77 | for (u32 i = 0; i < num_derivates; ++i) { | ||
| 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); | ||
| 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); | ||
| 80 | } | ||
| 81 | const Id derivates_X{ctx.OpCompositeConstruct( | ||
| 82 | ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; | ||
| 83 | const Id derivates_Y{ctx.OpCompositeConstruct( | ||
| 84 | ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; | ||
| 85 | Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); | ||
| 86 | if (Sirit::ValidId(offset)) { | ||
| 87 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 88 | } | ||
| 89 | if (has_lod_clamp) { | ||
| 90 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | std::span<const Id> Span() const noexcept { | ||
| 95 | return std::span{operands.data(), operands.size()}; | ||
| 96 | } | ||
| 97 | |||
| 98 | std::optional<spv::ImageOperandsMask> MaskOptional() const noexcept { | ||
| 99 | return mask != spv::ImageOperandsMask{} ? std::make_optional(mask) : std::nullopt; | ||
| 100 | } | ||
| 101 | |||
| 102 | spv::ImageOperandsMask Mask() const noexcept { | ||
| 103 | return mask; | ||
| 104 | } | ||
| 105 | |||
| 106 | private: | ||
| 107 | void AddOffset(EmitContext& ctx, const IR::Value& offset) { | ||
| 108 | if (offset.IsEmpty()) { | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | if (offset.IsImmediate()) { | ||
| 112 | Add(spv::ImageOperandsMask::ConstOffset, ctx.SConst(static_cast<s32>(offset.U32()))); | ||
| 113 | return; | ||
| 114 | } | ||
| 115 | IR::Inst* const inst{offset.InstRecursive()}; | ||
| 116 | if (inst->AreAllArgsImmediates()) { | ||
| 117 | switch (inst->GetOpcode()) { | ||
| 118 | case IR::Opcode::CompositeConstructU32x2: | ||
| 119 | Add(spv::ImageOperandsMask::ConstOffset, | ||
| 120 | ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), | ||
| 121 | static_cast<s32>(inst->Arg(1).U32()))); | ||
| 122 | return; | ||
| 123 | case IR::Opcode::CompositeConstructU32x3: | ||
| 124 | Add(spv::ImageOperandsMask::ConstOffset, | ||
| 125 | ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), | ||
| 126 | static_cast<s32>(inst->Arg(1).U32()), | ||
| 127 | static_cast<s32>(inst->Arg(2).U32()))); | ||
| 128 | return; | ||
| 129 | case IR::Opcode::CompositeConstructU32x4: | ||
| 130 | Add(spv::ImageOperandsMask::ConstOffset, | ||
| 131 | ctx.SConst(static_cast<s32>(inst->Arg(0).U32()), | ||
| 132 | static_cast<s32>(inst->Arg(1).U32()), | ||
| 133 | static_cast<s32>(inst->Arg(2).U32()), | ||
| 134 | static_cast<s32>(inst->Arg(3).U32()))); | ||
| 135 | return; | ||
| 136 | default: | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | } | ||
| 140 | Add(spv::ImageOperandsMask::Offset, ctx.Def(offset)); | ||
| 141 | } | ||
| 142 | |||
| 143 | void Add(spv::ImageOperandsMask new_mask, Id value) { | ||
| 144 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | ||
| 145 | static_cast<unsigned>(new_mask)); | ||
| 146 | operands.push_back(value); | ||
| 147 | } | ||
| 148 | |||
| 149 | void Add(spv::ImageOperandsMask new_mask, Id value_1, Id value_2) { | ||
| 150 | mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) | | ||
| 151 | static_cast<unsigned>(new_mask)); | ||
| 152 | operands.push_back(value_1); | ||
| 153 | operands.push_back(value_2); | ||
| 154 | } | ||
| 155 | |||
| 156 | boost::container::static_vector<Id, 4> operands; | ||
| 157 | spv::ImageOperandsMask mask{}; | ||
| 158 | }; | ||
| 159 | |||
| 160 | Id Texture(EmitContext& ctx, IR::TextureInstInfo info, [[maybe_unused]] const IR::Value& index) { | ||
| 161 | const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; | ||
| 162 | if (def.count > 1) { | ||
| 163 | const Id pointer{ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(index))}; | ||
| 164 | return ctx.OpLoad(def.sampled_type, pointer); | ||
| 165 | } else { | ||
| 166 | return ctx.OpLoad(def.sampled_type, def.id); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& index) { | ||
| 171 | if (!index.IsImmediate() || index.U32() != 0) { | ||
| 172 | throw NotImplementedException("Indirect image indexing"); | ||
| 173 | } | ||
| 174 | if (info.type == TextureType::Buffer) { | ||
| 175 | const TextureBufferDefinition& def{ctx.texture_buffers.at(info.descriptor_index)}; | ||
| 176 | if (def.count > 1) { | ||
| 177 | throw NotImplementedException("Indirect texture sample"); | ||
| 178 | } | ||
| 179 | const Id sampler_id{def.id}; | ||
| 180 | const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; | ||
| 181 | return ctx.OpImage(ctx.image_buffer_type, id); | ||
| 182 | } else { | ||
| 183 | const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; | ||
| 184 | if (def.count > 1) { | ||
| 185 | throw NotImplementedException("Indirect texture sample"); | ||
| 186 | } | ||
| 187 | return ctx.OpImage(def.image_type, ctx.OpLoad(def.sampled_type, def.id)); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { | ||
| 192 | if (!index.IsImmediate() || index.U32() != 0) { | ||
| 193 | throw NotImplementedException("Indirect image indexing"); | ||
| 194 | } | ||
| 195 | if (info.type == TextureType::Buffer) { | ||
| 196 | const ImageBufferDefinition def{ctx.image_buffers.at(info.descriptor_index)}; | ||
| 197 | return ctx.OpLoad(def.image_type, def.id); | ||
| 198 | } else { | ||
| 199 | const ImageDefinition def{ctx.images.at(info.descriptor_index)}; | ||
| 200 | return ctx.OpLoad(def.image_type, def.id); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) { | ||
| 205 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 206 | if (info.relaxed_precision != 0) { | ||
| 207 | ctx.Decorate(sample, spv::Decoration::RelaxedPrecision); | ||
| 208 | } | ||
| 209 | return sample; | ||
| 210 | } | ||
| 211 | |||
| 212 | template <typename MethodPtrType, typename... Args> | ||
| 213 | Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, | ||
| 214 | Id result_type, Args&&... args) { | ||
| 215 | IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 216 | if (!sparse) { | ||
| 217 | return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...)); | ||
| 218 | } | ||
| 219 | const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; | ||
| 220 | const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; | ||
| 221 | const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; | ||
| 222 | sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); | ||
| 223 | sparse->Invalidate(); | ||
| 224 | Decorate(ctx, inst, sample); | ||
| 225 | return ctx.OpCompositeExtract(result_type, sample, 1U); | ||
| 226 | } | ||
| 227 | } // Anonymous namespace | ||
| 228 | |||
| 229 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 230 | throw LogicError("Unreachable instruction"); | ||
| 231 | } | ||
| 232 | |||
| 233 | Id EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 234 | throw LogicError("Unreachable instruction"); | ||
| 235 | } | ||
| 236 | |||
| 237 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 238 | throw LogicError("Unreachable instruction"); | ||
| 239 | } | ||
| 240 | |||
| 241 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 242 | throw LogicError("Unreachable instruction"); | ||
| 243 | } | ||
| 244 | |||
| 245 | Id EmitBindlessImageGather(EmitContext&) { | ||
| 246 | throw LogicError("Unreachable instruction"); | ||
| 247 | } | ||
| 248 | |||
| 249 | Id EmitBindlessImageGatherDref(EmitContext&) { | ||
| 250 | throw LogicError("Unreachable instruction"); | ||
| 251 | } | ||
| 252 | |||
| 253 | Id EmitBindlessImageFetch(EmitContext&) { | ||
| 254 | throw LogicError("Unreachable instruction"); | ||
| 255 | } | ||
| 256 | |||
| 257 | Id EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 258 | throw LogicError("Unreachable instruction"); | ||
| 259 | } | ||
| 260 | |||
| 261 | Id EmitBindlessImageQueryLod(EmitContext&) { | ||
| 262 | throw LogicError("Unreachable instruction"); | ||
| 263 | } | ||
| 264 | |||
| 265 | Id EmitBindlessImageGradient(EmitContext&) { | ||
| 266 | throw LogicError("Unreachable instruction"); | ||
| 267 | } | ||
| 268 | |||
| 269 | Id EmitBindlessImageRead(EmitContext&) { | ||
| 270 | throw LogicError("Unreachable instruction"); | ||
| 271 | } | ||
| 272 | |||
| 273 | Id EmitBindlessImageWrite(EmitContext&) { | ||
| 274 | throw LogicError("Unreachable instruction"); | ||
| 275 | } | ||
| 276 | |||
| 277 | Id EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 278 | throw LogicError("Unreachable instruction"); | ||
| 279 | } | ||
| 280 | |||
| 281 | Id EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 282 | throw LogicError("Unreachable instruction"); | ||
| 283 | } | ||
| 284 | |||
| 285 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 286 | throw LogicError("Unreachable instruction"); | ||
| 287 | } | ||
| 288 | |||
| 289 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 290 | throw LogicError("Unreachable instruction"); | ||
| 291 | } | ||
| 292 | |||
| 293 | Id EmitBoundImageGather(EmitContext&) { | ||
| 294 | throw LogicError("Unreachable instruction"); | ||
| 295 | } | ||
| 296 | |||
| 297 | Id EmitBoundImageGatherDref(EmitContext&) { | ||
| 298 | throw LogicError("Unreachable instruction"); | ||
| 299 | } | ||
| 300 | |||
| 301 | Id EmitBoundImageFetch(EmitContext&) { | ||
| 302 | throw LogicError("Unreachable instruction"); | ||
| 303 | } | ||
| 304 | |||
| 305 | Id EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 306 | throw LogicError("Unreachable instruction"); | ||
| 307 | } | ||
| 308 | |||
| 309 | Id EmitBoundImageQueryLod(EmitContext&) { | ||
| 310 | throw LogicError("Unreachable instruction"); | ||
| 311 | } | ||
| 312 | |||
| 313 | Id EmitBoundImageGradient(EmitContext&) { | ||
| 314 | throw LogicError("Unreachable instruction"); | ||
| 315 | } | ||
| 316 | |||
| 317 | Id EmitBoundImageRead(EmitContext&) { | ||
| 318 | throw LogicError("Unreachable instruction"); | ||
| 319 | } | ||
| 320 | |||
| 321 | Id EmitBoundImageWrite(EmitContext&) { | ||
| 322 | throw LogicError("Unreachable instruction"); | ||
| 323 | } | ||
| 324 | |||
| 325 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 326 | Id bias_lc, const IR::Value& offset) { | ||
| 327 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 328 | if (ctx.stage == Stage::Fragment) { | ||
| 329 | const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, | ||
| 330 | bias_lc, offset); | ||
| 331 | return Emit(&EmitContext::OpImageSparseSampleImplicitLod, | ||
| 332 | &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], | ||
| 333 | Texture(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); | ||
| 334 | } else { | ||
| 335 | // We can't use implicit lods on non-fragment stages on SPIR-V. Maxwell hardware behaves as | ||
| 336 | // if the lod was explicitly zero. This may change on Turing with implicit compute | ||
| 337 | // derivatives | ||
| 338 | const Id lod{ctx.Const(0.0f)}; | ||
| 339 | const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); | ||
| 340 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 341 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | ||
| 342 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | ||
| 343 | } | ||
| 344 | } | ||
| 345 | |||
| 346 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 347 | Id lod, const IR::Value& offset) { | ||
| 348 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 349 | const ImageOperands operands(ctx, false, true, false, lod, offset); | ||
| 350 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 351 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | ||
| 352 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | ||
| 353 | } | ||
| 354 | |||
| 355 | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 356 | Id coords, Id dref, Id bias_lc, const IR::Value& offset) { | ||
| 357 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 358 | const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc, | ||
| 359 | offset); | ||
| 360 | return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, | ||
| 361 | &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], | ||
| 362 | Texture(ctx, info, index), coords, dref, operands.MaskOptional(), operands.Span()); | ||
| 363 | } | ||
| 364 | |||
| 365 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 366 | Id coords, Id dref, Id lod, const IR::Value& offset) { | ||
| 367 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 368 | const ImageOperands operands(ctx, false, true, false, lod, offset); | ||
| 369 | return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, | ||
| 370 | &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], | ||
| 371 | Texture(ctx, info, index), coords, dref, operands.Mask(), operands.Span()); | ||
| 372 | } | ||
| 373 | |||
| 374 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 375 | const IR::Value& offset, const IR::Value& offset2) { | ||
| 376 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 377 | const ImageOperands operands(ctx, offset, offset2); | ||
| 378 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | ||
| 379 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), | ||
| 380 | operands.MaskOptional(), operands.Span()); | ||
| 381 | } | ||
| 382 | |||
| 383 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 384 | const IR::Value& offset, const IR::Value& offset2, Id dref) { | ||
| 385 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 386 | const ImageOperands operands(ctx, offset, offset2); | ||
| 387 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | ||
| 388 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), | ||
| 389 | operands.Span()); | ||
| 390 | } | ||
| 391 | |||
| 392 | Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||
| 393 | Id lod, Id ms) { | ||
| 394 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 395 | if (info.type == TextureType::Buffer) { | ||
| 396 | lod = Id{}; | ||
| 397 | } | ||
| 398 | const ImageOperands operands(offset, lod, ms); | ||
| 399 | return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], | ||
| 400 | TextureImage(ctx, info, index), coords, operands.MaskOptional(), operands.Span()); | ||
| 401 | } | ||
| 402 | |||
| 403 | Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { | ||
| 404 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 405 | const Id image{TextureImage(ctx, info, index)}; | ||
| 406 | const Id zero{ctx.u32_zero_value}; | ||
| 407 | const auto mips{[&] { return ctx.OpImageQueryLevels(ctx.U32[1], image); }}; | ||
| 408 | switch (info.type) { | ||
| 409 | case TextureType::Color1D: | ||
| 410 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[1], image, lod), | ||
| 411 | zero, zero, mips()); | ||
| 412 | case TextureType::ColorArray1D: | ||
| 413 | case TextureType::Color2D: | ||
| 414 | case TextureType::ColorCube: | ||
| 415 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[2], image, lod), | ||
| 416 | zero, mips()); | ||
| 417 | case TextureType::ColorArray2D: | ||
| 418 | case TextureType::Color3D: | ||
| 419 | case TextureType::ColorArrayCube: | ||
| 420 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySizeLod(ctx.U32[3], image, lod), | ||
| 421 | mips()); | ||
| 422 | case TextureType::Buffer: | ||
| 423 | return ctx.OpCompositeConstruct(ctx.U32[4], ctx.OpImageQuerySize(ctx.U32[1], image), zero, | ||
| 424 | zero, mips()); | ||
| 425 | } | ||
| 426 | throw LogicError("Unspecified image type {}", info.type.Value()); | ||
| 427 | } | ||
| 428 | |||
| 429 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { | ||
| 430 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 431 | const Id zero{ctx.f32_zero_value}; | ||
| 432 | const Id sampler{Texture(ctx, info, index)}; | ||
| 433 | return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords), | ||
| 434 | zero, zero); | ||
| 435 | } | ||
| 436 | |||
| 437 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 438 | Id derivates, Id offset, Id lod_clamp) { | ||
| 439 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 440 | const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, | ||
| 441 | offset, lod_clamp); | ||
| 442 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | ||
| 443 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | ||
| 444 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | ||
| 445 | } | ||
| 446 | |||
| 447 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) { | ||
| 448 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 449 | if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) { | ||
| 450 | LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host"); | ||
| 451 | return ctx.ConstantNull(ctx.U32[4]); | ||
| 452 | } | ||
| 453 | return Emit(&EmitContext::OpImageSparseRead, &EmitContext::OpImageRead, ctx, inst, ctx.U32[4], | ||
| 454 | Image(ctx, index, info), coords, std::nullopt, std::span<const Id>{}); | ||
| 455 | } | ||
| 456 | |||
| 457 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) { | ||
| 458 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 459 | ctx.OpImageWrite(Image(ctx, index, info), coords, color); | ||
| 460 | } | ||
| 461 | |||
| 462 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp new file mode 100644 index 000000000..d7f1a365a --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp | |||
| @@ -0,0 +1,183 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::SPIRV { | ||
| 10 | namespace { | ||
| 11 | Id Image(EmitContext& ctx, const IR::Value& index, IR::TextureInstInfo info) { | ||
| 12 | if (!index.IsImmediate()) { | ||
| 13 | throw NotImplementedException("Indirect image indexing"); | ||
| 14 | } | ||
| 15 | if (info.type == TextureType::Buffer) { | ||
| 16 | const ImageBufferDefinition def{ctx.image_buffers.at(index.U32())}; | ||
| 17 | return def.id; | ||
| 18 | } else { | ||
| 19 | const ImageDefinition def{ctx.images.at(index.U32())}; | ||
| 20 | return def.id; | ||
| 21 | } | ||
| 22 | } | ||
| 23 | |||
| 24 | std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { | ||
| 25 | const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))}; | ||
| 26 | const Id semantics{ctx.u32_zero_value}; | ||
| 27 | return {scope, semantics}; | ||
| 28 | } | ||
| 29 | |||
| 30 | Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, | ||
| 31 | Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { | ||
| 32 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||
| 33 | const Id image{Image(ctx, index, info)}; | ||
| 34 | const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))}; | ||
| 35 | const auto [scope, semantics]{AtomicArgs(ctx)}; | ||
| 36 | return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); | ||
| 37 | } | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 40 | Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 41 | Id value) { | ||
| 42 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicIAdd); | ||
| 43 | } | ||
| 44 | |||
| 45 | Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 46 | Id value) { | ||
| 47 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMin); | ||
| 48 | } | ||
| 49 | |||
| 50 | Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 51 | Id value) { | ||
| 52 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMin); | ||
| 53 | } | ||
| 54 | |||
| 55 | Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 56 | Id value) { | ||
| 57 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicSMax); | ||
| 58 | } | ||
| 59 | |||
| 60 | Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 61 | Id value) { | ||
| 62 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicUMax); | ||
| 63 | } | ||
| 64 | |||
| 65 | Id EmitImageAtomicInc32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { | ||
| 66 | // TODO: This is not yet implemented | ||
| 67 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 68 | } | ||
| 69 | |||
| 70 | Id EmitImageAtomicDec32(EmitContext&, IR::Inst*, const IR::Value&, Id, Id) { | ||
| 71 | // TODO: This is not yet implemented | ||
| 72 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 73 | } | ||
| 74 | |||
| 75 | Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 76 | Id value) { | ||
| 77 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicAnd); | ||
| 78 | } | ||
| 79 | |||
| 80 | Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 81 | Id value) { | ||
| 82 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicOr); | ||
| 83 | } | ||
| 84 | |||
| 85 | Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 86 | Id value) { | ||
| 87 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicXor); | ||
| 88 | } | ||
| 89 | |||
| 90 | Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 91 | Id value) { | ||
| 92 | return ImageAtomicU32(ctx, inst, index, coords, value, &Sirit::Module::OpAtomicExchange); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 96 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 97 | } | ||
| 98 | |||
| 99 | Id EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 100 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 104 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 108 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 112 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 116 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 120 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 124 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | Id EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 128 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 129 | } | ||
| 130 | |||
| 131 | Id EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 132 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 133 | } | ||
| 134 | |||
| 135 | Id EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 136 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 137 | } | ||
| 138 | |||
| 139 | Id EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 140 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 144 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 145 | } | ||
| 146 | |||
| 147 | Id EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 148 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 149 | } | ||
| 150 | |||
| 151 | Id EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 152 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 153 | } | ||
| 154 | |||
| 155 | Id EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 156 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 157 | } | ||
| 158 | |||
| 159 | Id EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 160 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 161 | } | ||
| 162 | |||
| 163 | Id EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 164 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 165 | } | ||
| 166 | |||
| 167 | Id EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 168 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 172 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 173 | } | ||
| 174 | |||
| 175 | Id EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 176 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 177 | } | ||
| 178 | |||
| 179 | Id EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 180 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 181 | } | ||
| 182 | |||
| 183 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h new file mode 100644 index 000000000..f99c02848 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | |||
| @@ -0,0 +1,579 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <sirit/sirit.h> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | enum class Attribute : u64; | ||
| 11 | enum class Patch : u64; | ||
| 12 | class Inst; | ||
| 13 | class Value; | ||
| 14 | } // namespace Shader::IR | ||
| 15 | |||
| 16 | namespace Shader::Backend::SPIRV { | ||
| 17 | |||
| 18 | using Sirit::Id; | ||
| 19 | |||
| 20 | class EmitContext; | ||
| 21 | |||
| 22 | // Microinstruction emitters | ||
| 23 | Id EmitPhi(EmitContext& ctx, IR::Inst* inst); | ||
| 24 | void EmitVoid(EmitContext& ctx); | ||
| 25 | Id EmitIdentity(EmitContext& ctx, const IR::Value& value); | ||
| 26 | Id EmitConditionRef(EmitContext& ctx, const IR::Value& value); | ||
| 27 | void EmitReference(EmitContext&); | ||
| 28 | void EmitPhiMove(EmitContext&); | ||
| 29 | void EmitJoin(EmitContext& ctx); | ||
| 30 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 31 | void EmitBarrier(EmitContext& ctx); | ||
| 32 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 33 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 34 | void EmitPrologue(EmitContext& ctx); | ||
| 35 | void EmitEpilogue(EmitContext& ctx); | ||
| 36 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); | ||
| 37 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 38 | void EmitGetRegister(EmitContext& ctx); | ||
| 39 | void EmitSetRegister(EmitContext& ctx); | ||
| 40 | void EmitGetPred(EmitContext& ctx); | ||
| 41 | void EmitSetPred(EmitContext& ctx); | ||
| 42 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 43 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 44 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 45 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 46 | Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 47 | Id EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 48 | Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 49 | Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 50 | Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 51 | Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 52 | Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 53 | Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); | ||
| 54 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); | ||
| 55 | Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); | ||
| 56 | void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); | ||
| 57 | Id EmitGetPatch(EmitContext& ctx, IR::Patch patch); | ||
| 58 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value); | ||
| 59 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); | ||
| 60 | void EmitSetSampleMask(EmitContext& ctx, Id value); | ||
| 61 | void EmitSetFragDepth(EmitContext& ctx, Id value); | ||
| 62 | void EmitGetZFlag(EmitContext& ctx); | ||
| 63 | void EmitGetSFlag(EmitContext& ctx); | ||
| 64 | void EmitGetCFlag(EmitContext& ctx); | ||
| 65 | void EmitGetOFlag(EmitContext& ctx); | ||
| 66 | void EmitSetZFlag(EmitContext& ctx); | ||
| 67 | void EmitSetSFlag(EmitContext& ctx); | ||
| 68 | void EmitSetCFlag(EmitContext& ctx); | ||
| 69 | void EmitSetOFlag(EmitContext& ctx); | ||
| 70 | Id EmitWorkgroupId(EmitContext& ctx); | ||
| 71 | Id EmitLocalInvocationId(EmitContext& ctx); | ||
| 72 | Id EmitInvocationId(EmitContext& ctx); | ||
| 73 | Id EmitSampleId(EmitContext& ctx); | ||
| 74 | Id EmitIsHelperInvocation(EmitContext& ctx); | ||
| 75 | Id EmitYDirection(EmitContext& ctx); | ||
| 76 | Id EmitLoadLocal(EmitContext& ctx, Id word_offset); | ||
| 77 | void EmitWriteLocal(EmitContext& ctx, Id word_offset, Id value); | ||
| 78 | Id EmitUndefU1(EmitContext& ctx); | ||
| 79 | Id EmitUndefU8(EmitContext& ctx); | ||
| 80 | Id EmitUndefU16(EmitContext& ctx); | ||
| 81 | Id EmitUndefU32(EmitContext& ctx); | ||
| 82 | Id EmitUndefU64(EmitContext& ctx); | ||
| 83 | void EmitLoadGlobalU8(EmitContext& ctx); | ||
| 84 | void EmitLoadGlobalS8(EmitContext& ctx); | ||
| 85 | void EmitLoadGlobalU16(EmitContext& ctx); | ||
| 86 | void EmitLoadGlobalS16(EmitContext& ctx); | ||
| 87 | Id EmitLoadGlobal32(EmitContext& ctx, Id address); | ||
| 88 | Id EmitLoadGlobal64(EmitContext& ctx, Id address); | ||
| 89 | Id EmitLoadGlobal128(EmitContext& ctx, Id address); | ||
| 90 | void EmitWriteGlobalU8(EmitContext& ctx); | ||
| 91 | void EmitWriteGlobalS8(EmitContext& ctx); | ||
| 92 | void EmitWriteGlobalU16(EmitContext& ctx); | ||
| 93 | void EmitWriteGlobalS16(EmitContext& ctx); | ||
| 94 | void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); | ||
| 95 | void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); | ||
| 96 | void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); | ||
| 97 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 98 | Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 99 | Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 100 | Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 101 | Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 102 | Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 103 | Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||
| 104 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 105 | Id value); | ||
| 106 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 107 | Id value); | ||
| 108 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 109 | Id value); | ||
| 110 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 111 | Id value); | ||
| 112 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 113 | Id value); | ||
| 114 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 115 | Id value); | ||
| 116 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 117 | Id value); | ||
| 118 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset); | ||
| 119 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset); | ||
| 120 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset); | ||
| 121 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset); | ||
| 122 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset); | ||
| 123 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset); | ||
| 124 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset); | ||
| 125 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value); | ||
| 126 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value); | ||
| 127 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value); | ||
| 128 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value); | ||
| 129 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value); | ||
| 130 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | ||
| 131 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 132 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 133 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | ||
| 134 | Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); | ||
| 135 | Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); | ||
| 136 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 137 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 138 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 139 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); | ||
| 140 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 141 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 142 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | ||
| 143 | Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); | ||
| 144 | Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); | ||
| 145 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 146 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 147 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 148 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); | ||
| 149 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 150 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 151 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | ||
| 152 | Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); | ||
| 153 | Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); | ||
| 154 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 155 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 156 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 157 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 158 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 159 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 160 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 161 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 162 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 163 | Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 164 | Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 165 | Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 166 | Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 167 | Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 168 | Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 169 | Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 170 | Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 171 | Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 172 | Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 173 | Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||
| 174 | void EmitBitCastU16F16(EmitContext& ctx); | ||
| 175 | Id EmitBitCastU32F32(EmitContext& ctx, Id value); | ||
| 176 | void EmitBitCastU64F64(EmitContext& ctx); | ||
| 177 | void EmitBitCastF16U16(EmitContext& ctx); | ||
| 178 | Id EmitBitCastF32U32(EmitContext& ctx, Id value); | ||
| 179 | void EmitBitCastF64U64(EmitContext& ctx); | ||
| 180 | Id EmitPackUint2x32(EmitContext& ctx, Id value); | ||
| 181 | Id EmitUnpackUint2x32(EmitContext& ctx, Id value); | ||
| 182 | Id EmitPackFloat2x16(EmitContext& ctx, Id value); | ||
| 183 | Id EmitUnpackFloat2x16(EmitContext& ctx, Id value); | ||
| 184 | Id EmitPackHalf2x16(EmitContext& ctx, Id value); | ||
| 185 | Id EmitUnpackHalf2x16(EmitContext& ctx, Id value); | ||
| 186 | Id EmitPackDouble2x32(EmitContext& ctx, Id value); | ||
| 187 | Id EmitUnpackDouble2x32(EmitContext& ctx, Id value); | ||
| 188 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 189 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 190 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 191 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 192 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 193 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 194 | Id EmitFPAbs16(EmitContext& ctx, Id value); | ||
| 195 | Id EmitFPAbs32(EmitContext& ctx, Id value); | ||
| 196 | Id EmitFPAbs64(EmitContext& ctx, Id value); | ||
| 197 | Id EmitFPAdd16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 198 | Id EmitFPAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 199 | Id EmitFPAdd64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 200 | Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); | ||
| 201 | Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); | ||
| 202 | Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); | ||
| 203 | Id EmitFPMax32(EmitContext& ctx, Id a, Id b); | ||
| 204 | Id EmitFPMax64(EmitContext& ctx, Id a, Id b); | ||
| 205 | Id EmitFPMin32(EmitContext& ctx, Id a, Id b); | ||
| 206 | Id EmitFPMin64(EmitContext& ctx, Id a, Id b); | ||
| 207 | Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 208 | Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 209 | Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 210 | Id EmitFPNeg16(EmitContext& ctx, Id value); | ||
| 211 | Id EmitFPNeg32(EmitContext& ctx, Id value); | ||
| 212 | Id EmitFPNeg64(EmitContext& ctx, Id value); | ||
| 213 | Id EmitFPSin(EmitContext& ctx, Id value); | ||
| 214 | Id EmitFPCos(EmitContext& ctx, Id value); | ||
| 215 | Id EmitFPExp2(EmitContext& ctx, Id value); | ||
| 216 | Id EmitFPLog2(EmitContext& ctx, Id value); | ||
| 217 | Id EmitFPRecip32(EmitContext& ctx, Id value); | ||
| 218 | Id EmitFPRecip64(EmitContext& ctx, Id value); | ||
| 219 | Id EmitFPRecipSqrt32(EmitContext& ctx, Id value); | ||
| 220 | Id EmitFPRecipSqrt64(EmitContext& ctx, Id value); | ||
| 221 | Id EmitFPSqrt(EmitContext& ctx, Id value); | ||
| 222 | Id EmitFPSaturate16(EmitContext& ctx, Id value); | ||
| 223 | Id EmitFPSaturate32(EmitContext& ctx, Id value); | ||
| 224 | Id EmitFPSaturate64(EmitContext& ctx, Id value); | ||
| 225 | Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value); | ||
| 226 | Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value); | ||
| 227 | Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value); | ||
| 228 | Id EmitFPRoundEven16(EmitContext& ctx, Id value); | ||
| 229 | Id EmitFPRoundEven32(EmitContext& ctx, Id value); | ||
| 230 | Id EmitFPRoundEven64(EmitContext& ctx, Id value); | ||
| 231 | Id EmitFPFloor16(EmitContext& ctx, Id value); | ||
| 232 | Id EmitFPFloor32(EmitContext& ctx, Id value); | ||
| 233 | Id EmitFPFloor64(EmitContext& ctx, Id value); | ||
| 234 | Id EmitFPCeil16(EmitContext& ctx, Id value); | ||
| 235 | Id EmitFPCeil32(EmitContext& ctx, Id value); | ||
| 236 | Id EmitFPCeil64(EmitContext& ctx, Id value); | ||
| 237 | Id EmitFPTrunc16(EmitContext& ctx, Id value); | ||
| 238 | Id EmitFPTrunc32(EmitContext& ctx, Id value); | ||
| 239 | Id EmitFPTrunc64(EmitContext& ctx, Id value); | ||
| 240 | Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 241 | Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 242 | Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 243 | Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 244 | Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 245 | Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 246 | Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 247 | Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 248 | Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 249 | Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 250 | Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 251 | Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 252 | Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 253 | Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 254 | Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 255 | Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 256 | Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 257 | Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 258 | Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 259 | Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 260 | Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 261 | Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 262 | Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 263 | Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 264 | Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 265 | Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 266 | Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 267 | Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 268 | Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 269 | Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 270 | Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 271 | Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 272 | Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 273 | Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs); | ||
| 274 | Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs); | ||
| 275 | Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs); | ||
| 276 | Id EmitFPIsNan16(EmitContext& ctx, Id value); | ||
| 277 | Id EmitFPIsNan32(EmitContext& ctx, Id value); | ||
| 278 | Id EmitFPIsNan64(EmitContext& ctx, Id value); | ||
| 279 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 280 | Id EmitIAdd64(EmitContext& ctx, Id a, Id b); | ||
| 281 | Id EmitISub32(EmitContext& ctx, Id a, Id b); | ||
| 282 | Id EmitISub64(EmitContext& ctx, Id a, Id b); | ||
| 283 | Id EmitIMul32(EmitContext& ctx, Id a, Id b); | ||
| 284 | Id EmitINeg32(EmitContext& ctx, Id value); | ||
| 285 | Id EmitINeg64(EmitContext& ctx, Id value); | ||
| 286 | Id EmitIAbs32(EmitContext& ctx, Id value); | ||
| 287 | Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift); | ||
| 288 | Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift); | ||
| 289 | Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift); | ||
| 290 | Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift); | ||
| 291 | Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift); | ||
| 292 | Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift); | ||
| 293 | Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 294 | Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 295 | Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||
| 296 | Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count); | ||
| 297 | Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); | ||
| 298 | Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count); | ||
| 299 | Id EmitBitReverse32(EmitContext& ctx, Id value); | ||
| 300 | Id EmitBitCount32(EmitContext& ctx, Id value); | ||
| 301 | Id EmitBitwiseNot32(EmitContext& ctx, Id value); | ||
| 302 | Id EmitFindSMsb32(EmitContext& ctx, Id value); | ||
| 303 | Id EmitFindUMsb32(EmitContext& ctx, Id value); | ||
| 304 | Id EmitSMin32(EmitContext& ctx, Id a, Id b); | ||
| 305 | Id EmitUMin32(EmitContext& ctx, Id a, Id b); | ||
| 306 | Id EmitSMax32(EmitContext& ctx, Id a, Id b); | ||
| 307 | Id EmitUMax32(EmitContext& ctx, Id a, Id b); | ||
| 308 | Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); | ||
| 309 | Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); | ||
| 310 | Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 311 | Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 312 | Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 313 | Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 314 | Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 315 | Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 316 | Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | ||
| 317 | Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 318 | Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 319 | Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||
| 320 | Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 321 | Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 322 | Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 323 | Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 324 | Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 325 | Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 326 | Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 327 | Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 328 | Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 329 | Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 330 | Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 331 | Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 332 | Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 333 | Id value); | ||
| 334 | Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 335 | Id value); | ||
| 336 | Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 337 | Id value); | ||
| 338 | Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 339 | Id value); | ||
| 340 | Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 341 | Id value); | ||
| 342 | Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 343 | Id value); | ||
| 344 | Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 345 | Id value); | ||
| 346 | Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 347 | Id value); | ||
| 348 | Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 349 | Id value); | ||
| 350 | Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 351 | Id value); | ||
| 352 | Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 353 | Id value); | ||
| 354 | Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 355 | Id value); | ||
| 356 | Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 357 | Id value); | ||
| 358 | Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 359 | Id value); | ||
| 360 | Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 361 | Id value); | ||
| 362 | Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 363 | Id value); | ||
| 364 | Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 365 | Id value); | ||
| 366 | Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 367 | Id value); | ||
| 368 | Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 369 | Id value); | ||
| 370 | Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 371 | Id value); | ||
| 372 | Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 373 | Id value); | ||
| 374 | Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 375 | Id value); | ||
| 376 | Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 377 | Id value); | ||
| 378 | Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 379 | Id value); | ||
| 380 | Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 381 | Id value); | ||
| 382 | Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 383 | Id value); | ||
| 384 | Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 385 | Id value); | ||
| 386 | Id EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 387 | Id EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 388 | Id EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 389 | Id EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 390 | Id EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 391 | Id EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 392 | Id EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 393 | Id EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 394 | Id EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 395 | Id EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 396 | Id EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 397 | Id EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 398 | Id EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 399 | Id EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 400 | Id EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 401 | Id EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 402 | Id EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 403 | Id EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 404 | Id EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 405 | Id EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 406 | Id EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 407 | Id EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 408 | Id EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 409 | Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 410 | Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 411 | Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 412 | Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 413 | Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 414 | Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 415 | Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); | ||
| 416 | Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); | ||
| 417 | Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); | ||
| 418 | Id EmitLogicalNot(EmitContext& ctx, Id value); | ||
| 419 | Id EmitConvertS16F16(EmitContext& ctx, Id value); | ||
| 420 | Id EmitConvertS16F32(EmitContext& ctx, Id value); | ||
| 421 | Id EmitConvertS16F64(EmitContext& ctx, Id value); | ||
| 422 | Id EmitConvertS32F16(EmitContext& ctx, Id value); | ||
| 423 | Id EmitConvertS32F32(EmitContext& ctx, Id value); | ||
| 424 | Id EmitConvertS32F64(EmitContext& ctx, Id value); | ||
| 425 | Id EmitConvertS64F16(EmitContext& ctx, Id value); | ||
| 426 | Id EmitConvertS64F32(EmitContext& ctx, Id value); | ||
| 427 | Id EmitConvertS64F64(EmitContext& ctx, Id value); | ||
| 428 | Id EmitConvertU16F16(EmitContext& ctx, Id value); | ||
| 429 | Id EmitConvertU16F32(EmitContext& ctx, Id value); | ||
| 430 | Id EmitConvertU16F64(EmitContext& ctx, Id value); | ||
| 431 | Id EmitConvertU32F16(EmitContext& ctx, Id value); | ||
| 432 | Id EmitConvertU32F32(EmitContext& ctx, Id value); | ||
| 433 | Id EmitConvertU32F64(EmitContext& ctx, Id value); | ||
| 434 | Id EmitConvertU64F16(EmitContext& ctx, Id value); | ||
| 435 | Id EmitConvertU64F32(EmitContext& ctx, Id value); | ||
| 436 | Id EmitConvertU64F64(EmitContext& ctx, Id value); | ||
| 437 | Id EmitConvertU64U32(EmitContext& ctx, Id value); | ||
| 438 | Id EmitConvertU32U64(EmitContext& ctx, Id value); | ||
| 439 | Id EmitConvertF16F32(EmitContext& ctx, Id value); | ||
| 440 | Id EmitConvertF32F16(EmitContext& ctx, Id value); | ||
| 441 | Id EmitConvertF32F64(EmitContext& ctx, Id value); | ||
| 442 | Id EmitConvertF64F32(EmitContext& ctx, Id value); | ||
| 443 | Id EmitConvertF16S8(EmitContext& ctx, Id value); | ||
| 444 | Id EmitConvertF16S16(EmitContext& ctx, Id value); | ||
| 445 | Id EmitConvertF16S32(EmitContext& ctx, Id value); | ||
| 446 | Id EmitConvertF16S64(EmitContext& ctx, Id value); | ||
| 447 | Id EmitConvertF16U8(EmitContext& ctx, Id value); | ||
| 448 | Id EmitConvertF16U16(EmitContext& ctx, Id value); | ||
| 449 | Id EmitConvertF16U32(EmitContext& ctx, Id value); | ||
| 450 | Id EmitConvertF16U64(EmitContext& ctx, Id value); | ||
| 451 | Id EmitConvertF32S8(EmitContext& ctx, Id value); | ||
| 452 | Id EmitConvertF32S16(EmitContext& ctx, Id value); | ||
| 453 | Id EmitConvertF32S32(EmitContext& ctx, Id value); | ||
| 454 | Id EmitConvertF32S64(EmitContext& ctx, Id value); | ||
| 455 | Id EmitConvertF32U8(EmitContext& ctx, Id value); | ||
| 456 | Id EmitConvertF32U16(EmitContext& ctx, Id value); | ||
| 457 | Id EmitConvertF32U32(EmitContext& ctx, Id value); | ||
| 458 | Id EmitConvertF32U64(EmitContext& ctx, Id value); | ||
| 459 | Id EmitConvertF64S8(EmitContext& ctx, Id value); | ||
| 460 | Id EmitConvertF64S16(EmitContext& ctx, Id value); | ||
| 461 | Id EmitConvertF64S32(EmitContext& ctx, Id value); | ||
| 462 | Id EmitConvertF64S64(EmitContext& ctx, Id value); | ||
| 463 | Id EmitConvertF64U8(EmitContext& ctx, Id value); | ||
| 464 | Id EmitConvertF64U16(EmitContext& ctx, Id value); | ||
| 465 | Id EmitConvertF64U32(EmitContext& ctx, Id value); | ||
| 466 | Id EmitConvertF64U64(EmitContext& ctx, Id value); | ||
| 467 | Id EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 468 | Id EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 469 | Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 470 | Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 471 | Id EmitBindlessImageGather(EmitContext&); | ||
| 472 | Id EmitBindlessImageGatherDref(EmitContext&); | ||
| 473 | Id EmitBindlessImageFetch(EmitContext&); | ||
| 474 | Id EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 475 | Id EmitBindlessImageQueryLod(EmitContext&); | ||
| 476 | Id EmitBindlessImageGradient(EmitContext&); | ||
| 477 | Id EmitBindlessImageRead(EmitContext&); | ||
| 478 | Id EmitBindlessImageWrite(EmitContext&); | ||
| 479 | Id EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 480 | Id EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 481 | Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 482 | Id EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 483 | Id EmitBoundImageGather(EmitContext&); | ||
| 484 | Id EmitBoundImageGatherDref(EmitContext&); | ||
| 485 | Id EmitBoundImageFetch(EmitContext&); | ||
| 486 | Id EmitBoundImageQueryDimensions(EmitContext&); | ||
| 487 | Id EmitBoundImageQueryLod(EmitContext&); | ||
| 488 | Id EmitBoundImageGradient(EmitContext&); | ||
| 489 | Id EmitBoundImageRead(EmitContext&); | ||
| 490 | Id EmitBoundImageWrite(EmitContext&); | ||
| 491 | Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 492 | Id bias_lc, const IR::Value& offset); | ||
| 493 | Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 494 | Id lod, const IR::Value& offset); | ||
| 495 | Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 496 | Id coords, Id dref, Id bias_lc, const IR::Value& offset); | ||
| 497 | Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | ||
| 498 | Id coords, Id dref, Id lod, const IR::Value& offset); | ||
| 499 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 500 | const IR::Value& offset, const IR::Value& offset2); | ||
| 501 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 502 | const IR::Value& offset, const IR::Value& offset2, Id dref); | ||
| 503 | Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | ||
| 504 | Id lod, Id ms); | ||
| 505 | Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod); | ||
| 506 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | ||
| 507 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 508 | Id derivates, Id offset, Id lod_clamp); | ||
| 509 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | ||
| 510 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); | ||
| 511 | Id EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 512 | Id EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 513 | Id EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 514 | Id EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 515 | Id EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 516 | Id EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 517 | Id EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 518 | Id EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 519 | Id EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 520 | Id EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 521 | Id EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 522 | Id EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 523 | Id EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 524 | Id EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 525 | Id EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 526 | Id EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 527 | Id EmitBoundImageAtomicInc32(EmitContext&); | ||
| 528 | Id EmitBoundImageAtomicDec32(EmitContext&); | ||
| 529 | Id EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 530 | Id EmitBoundImageAtomicOr32(EmitContext&); | ||
| 531 | Id EmitBoundImageAtomicXor32(EmitContext&); | ||
| 532 | Id EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 533 | Id EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 534 | Id value); | ||
| 535 | Id EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 536 | Id value); | ||
| 537 | Id EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 538 | Id value); | ||
| 539 | Id EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 540 | Id value); | ||
| 541 | Id EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 542 | Id value); | ||
| 543 | Id EmitImageAtomicInc32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 544 | Id value); | ||
| 545 | Id EmitImageAtomicDec32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 546 | Id value); | ||
| 547 | Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 548 | Id value); | ||
| 549 | Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 550 | Id value); | ||
| 551 | Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 552 | Id value); | ||
| 553 | Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | ||
| 554 | Id value); | ||
| 555 | Id EmitLaneId(EmitContext& ctx); | ||
| 556 | Id EmitVoteAll(EmitContext& ctx, Id pred); | ||
| 557 | Id EmitVoteAny(EmitContext& ctx, Id pred); | ||
| 558 | Id EmitVoteEqual(EmitContext& ctx, Id pred); | ||
| 559 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | ||
| 560 | Id EmitSubgroupEqMask(EmitContext& ctx); | ||
| 561 | Id EmitSubgroupLtMask(EmitContext& ctx); | ||
| 562 | Id EmitSubgroupLeMask(EmitContext& ctx); | ||
| 563 | Id EmitSubgroupGtMask(EmitContext& ctx); | ||
| 564 | Id EmitSubgroupGeMask(EmitContext& ctx); | ||
| 565 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 566 | Id segmentation_mask); | ||
| 567 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 568 | Id segmentation_mask); | ||
| 569 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 570 | Id segmentation_mask); | ||
| 571 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 572 | Id segmentation_mask); | ||
| 573 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); | ||
| 574 | Id EmitDPdxFine(EmitContext& ctx, Id op_a); | ||
| 575 | Id EmitDPdyFine(EmitContext& ctx, Id op_a); | ||
| 576 | Id EmitDPdxCoarse(EmitContext& ctx, Id op_a); | ||
| 577 | Id EmitDPdyCoarse(EmitContext& ctx, Id op_a); | ||
| 578 | |||
| 579 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp new file mode 100644 index 000000000..3501d7495 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp | |||
| @@ -0,0 +1,270 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | void SetZeroFlag(EmitContext& ctx, IR::Inst* inst, Id result) { | ||
| 11 | IR::Inst* const zero{inst->GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)}; | ||
| 12 | if (!zero) { | ||
| 13 | return; | ||
| 14 | } | ||
| 15 | zero->SetDefinition(ctx.OpIEqual(ctx.U1, result, ctx.u32_zero_value)); | ||
| 16 | zero->Invalidate(); | ||
| 17 | } | ||
| 18 | |||
| 19 | void SetSignFlag(EmitContext& ctx, IR::Inst* inst, Id result) { | ||
| 20 | IR::Inst* const sign{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)}; | ||
| 21 | if (!sign) { | ||
| 22 | return; | ||
| 23 | } | ||
| 24 | sign->SetDefinition(ctx.OpSLessThan(ctx.U1, result, ctx.u32_zero_value)); | ||
| 25 | sign->Invalidate(); | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 30 | Id result{}; | ||
| 31 | if (IR::Inst* const carry{inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp)}) { | ||
| 32 | const Id carry_type{ctx.TypeStruct(ctx.U32[1], ctx.U32[1])}; | ||
| 33 | const Id carry_result{ctx.OpIAddCarry(carry_type, a, b)}; | ||
| 34 | result = ctx.OpCompositeExtract(ctx.U32[1], carry_result, 0U); | ||
| 35 | |||
| 36 | const Id carry_value{ctx.OpCompositeExtract(ctx.U32[1], carry_result, 1U)}; | ||
| 37 | carry->SetDefinition(ctx.OpINotEqual(ctx.U1, carry_value, ctx.u32_zero_value)); | ||
| 38 | carry->Invalidate(); | ||
| 39 | } else { | ||
| 40 | result = ctx.OpIAdd(ctx.U32[1], a, b); | ||
| 41 | } | ||
| 42 | SetZeroFlag(ctx, inst, result); | ||
| 43 | SetSignFlag(ctx, inst, result); | ||
| 44 | if (IR::Inst * overflow{inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)}) { | ||
| 45 | // https://stackoverflow.com/questions/55468823/how-to-detect-integer-overflow-in-c | ||
| 46 | constexpr u32 s32_max{static_cast<u32>(std::numeric_limits<s32>::max())}; | ||
| 47 | const Id is_positive{ctx.OpSGreaterThanEqual(ctx.U1, a, ctx.u32_zero_value)}; | ||
| 48 | const Id sub_a{ctx.OpISub(ctx.U32[1], ctx.Const(s32_max), a)}; | ||
| 49 | |||
| 50 | const Id positive_test{ctx.OpSGreaterThan(ctx.U1, b, sub_a)}; | ||
| 51 | const Id negative_test{ctx.OpSLessThan(ctx.U1, b, sub_a)}; | ||
| 52 | const Id carry_flag{ctx.OpSelect(ctx.U1, is_positive, positive_test, negative_test)}; | ||
| 53 | overflow->SetDefinition(carry_flag); | ||
| 54 | overflow->Invalidate(); | ||
| 55 | } | ||
| 56 | return result; | ||
| 57 | } | ||
| 58 | |||
| 59 | Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { | ||
| 60 | return ctx.OpIAdd(ctx.U64, a, b); | ||
| 61 | } | ||
| 62 | |||
| 63 | Id EmitISub32(EmitContext& ctx, Id a, Id b) { | ||
| 64 | return ctx.OpISub(ctx.U32[1], a, b); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitISub64(EmitContext& ctx, Id a, Id b) { | ||
| 68 | return ctx.OpISub(ctx.U64, a, b); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitIMul32(EmitContext& ctx, Id a, Id b) { | ||
| 72 | return ctx.OpIMul(ctx.U32[1], a, b); | ||
| 73 | } | ||
| 74 | |||
| 75 | Id EmitINeg32(EmitContext& ctx, Id value) { | ||
| 76 | return ctx.OpSNegate(ctx.U32[1], value); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitINeg64(EmitContext& ctx, Id value) { | ||
| 80 | return ctx.OpSNegate(ctx.U64, value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitIAbs32(EmitContext& ctx, Id value) { | ||
| 84 | return ctx.OpSAbs(ctx.U32[1], value); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitShiftLeftLogical32(EmitContext& ctx, Id base, Id shift) { | ||
| 88 | return ctx.OpShiftLeftLogical(ctx.U32[1], base, shift); | ||
| 89 | } | ||
| 90 | |||
| 91 | Id EmitShiftLeftLogical64(EmitContext& ctx, Id base, Id shift) { | ||
| 92 | return ctx.OpShiftLeftLogical(ctx.U64, base, shift); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitShiftRightLogical32(EmitContext& ctx, Id base, Id shift) { | ||
| 96 | return ctx.OpShiftRightLogical(ctx.U32[1], base, shift); | ||
| 97 | } | ||
| 98 | |||
| 99 | Id EmitShiftRightLogical64(EmitContext& ctx, Id base, Id shift) { | ||
| 100 | return ctx.OpShiftRightLogical(ctx.U64, base, shift); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitShiftRightArithmetic32(EmitContext& ctx, Id base, Id shift) { | ||
| 104 | return ctx.OpShiftRightArithmetic(ctx.U32[1], base, shift); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitShiftRightArithmetic64(EmitContext& ctx, Id base, Id shift) { | ||
| 108 | return ctx.OpShiftRightArithmetic(ctx.U64, base, shift); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitBitwiseAnd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 112 | const Id result{ctx.OpBitwiseAnd(ctx.U32[1], a, b)}; | ||
| 113 | SetZeroFlag(ctx, inst, result); | ||
| 114 | SetSignFlag(ctx, inst, result); | ||
| 115 | return result; | ||
| 116 | } | ||
| 117 | |||
| 118 | Id EmitBitwiseOr32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 119 | const Id result{ctx.OpBitwiseOr(ctx.U32[1], a, b)}; | ||
| 120 | SetZeroFlag(ctx, inst, result); | ||
| 121 | SetSignFlag(ctx, inst, result); | ||
| 122 | return result; | ||
| 123 | } | ||
| 124 | |||
| 125 | Id EmitBitwiseXor32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||
| 126 | const Id result{ctx.OpBitwiseXor(ctx.U32[1], a, b)}; | ||
| 127 | SetZeroFlag(ctx, inst, result); | ||
| 128 | SetSignFlag(ctx, inst, result); | ||
| 129 | return result; | ||
| 130 | } | ||
| 131 | |||
| 132 | Id EmitBitFieldInsert(EmitContext& ctx, Id base, Id insert, Id offset, Id count) { | ||
| 133 | return ctx.OpBitFieldInsert(ctx.U32[1], base, insert, offset, count); | ||
| 134 | } | ||
| 135 | |||
| 136 | Id EmitBitFieldSExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { | ||
| 137 | const Id result{ctx.OpBitFieldSExtract(ctx.U32[1], base, offset, count)}; | ||
| 138 | SetZeroFlag(ctx, inst, result); | ||
| 139 | SetSignFlag(ctx, inst, result); | ||
| 140 | return result; | ||
| 141 | } | ||
| 142 | |||
| 143 | Id EmitBitFieldUExtract(EmitContext& ctx, IR::Inst* inst, Id base, Id offset, Id count) { | ||
| 144 | const Id result{ctx.OpBitFieldUExtract(ctx.U32[1], base, offset, count)}; | ||
| 145 | SetZeroFlag(ctx, inst, result); | ||
| 146 | SetSignFlag(ctx, inst, result); | ||
| 147 | return result; | ||
| 148 | } | ||
| 149 | |||
| 150 | Id EmitBitReverse32(EmitContext& ctx, Id value) { | ||
| 151 | return ctx.OpBitReverse(ctx.U32[1], value); | ||
| 152 | } | ||
| 153 | |||
| 154 | Id EmitBitCount32(EmitContext& ctx, Id value) { | ||
| 155 | return ctx.OpBitCount(ctx.U32[1], value); | ||
| 156 | } | ||
| 157 | |||
| 158 | Id EmitBitwiseNot32(EmitContext& ctx, Id value) { | ||
| 159 | return ctx.OpNot(ctx.U32[1], value); | ||
| 160 | } | ||
| 161 | |||
| 162 | Id EmitFindSMsb32(EmitContext& ctx, Id value) { | ||
| 163 | return ctx.OpFindSMsb(ctx.U32[1], value); | ||
| 164 | } | ||
| 165 | |||
| 166 | Id EmitFindUMsb32(EmitContext& ctx, Id value) { | ||
| 167 | return ctx.OpFindUMsb(ctx.U32[1], value); | ||
| 168 | } | ||
| 169 | |||
| 170 | Id EmitSMin32(EmitContext& ctx, Id a, Id b) { | ||
| 171 | const bool is_broken{ctx.profile.has_broken_signed_operations}; | ||
| 172 | if (is_broken) { | ||
| 173 | a = ctx.OpBitcast(ctx.S32[1], a); | ||
| 174 | b = ctx.OpBitcast(ctx.S32[1], b); | ||
| 175 | } | ||
| 176 | const Id result{ctx.OpSMin(ctx.U32[1], a, b)}; | ||
| 177 | return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; | ||
| 178 | } | ||
| 179 | |||
| 180 | Id EmitUMin32(EmitContext& ctx, Id a, Id b) { | ||
| 181 | return ctx.OpUMin(ctx.U32[1], a, b); | ||
| 182 | } | ||
| 183 | |||
| 184 | Id EmitSMax32(EmitContext& ctx, Id a, Id b) { | ||
| 185 | const bool is_broken{ctx.profile.has_broken_signed_operations}; | ||
| 186 | if (is_broken) { | ||
| 187 | a = ctx.OpBitcast(ctx.S32[1], a); | ||
| 188 | b = ctx.OpBitcast(ctx.S32[1], b); | ||
| 189 | } | ||
| 190 | const Id result{ctx.OpSMax(ctx.U32[1], a, b)}; | ||
| 191 | return is_broken ? ctx.OpBitcast(ctx.U32[1], result) : result; | ||
| 192 | } | ||
| 193 | |||
| 194 | Id EmitUMax32(EmitContext& ctx, Id a, Id b) { | ||
| 195 | return ctx.OpUMax(ctx.U32[1], a, b); | ||
| 196 | } | ||
| 197 | |||
| 198 | Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { | ||
| 199 | Id result{}; | ||
| 200 | if (ctx.profile.has_broken_signed_operations || ctx.profile.has_broken_spirv_clamp) { | ||
| 201 | value = ctx.OpBitcast(ctx.S32[1], value); | ||
| 202 | min = ctx.OpBitcast(ctx.S32[1], min); | ||
| 203 | max = ctx.OpBitcast(ctx.S32[1], max); | ||
| 204 | if (ctx.profile.has_broken_spirv_clamp) { | ||
| 205 | result = ctx.OpSMax(ctx.S32[1], ctx.OpSMin(ctx.S32[1], value, max), min); | ||
| 206 | } else { | ||
| 207 | result = ctx.OpSClamp(ctx.S32[1], value, min, max); | ||
| 208 | } | ||
| 209 | result = ctx.OpBitcast(ctx.U32[1], result); | ||
| 210 | } else { | ||
| 211 | result = ctx.OpSClamp(ctx.U32[1], value, min, max); | ||
| 212 | } | ||
| 213 | SetZeroFlag(ctx, inst, result); | ||
| 214 | SetSignFlag(ctx, inst, result); | ||
| 215 | return result; | ||
| 216 | } | ||
| 217 | |||
| 218 | Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { | ||
| 219 | Id result{}; | ||
| 220 | if (ctx.profile.has_broken_spirv_clamp) { | ||
| 221 | result = ctx.OpUMax(ctx.U32[1], ctx.OpUMin(ctx.U32[1], value, max), min); | ||
| 222 | } else { | ||
| 223 | result = ctx.OpUClamp(ctx.U32[1], value, min, max); | ||
| 224 | } | ||
| 225 | SetZeroFlag(ctx, inst, result); | ||
| 226 | SetSignFlag(ctx, inst, result); | ||
| 227 | return result; | ||
| 228 | } | ||
| 229 | |||
| 230 | Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 231 | return ctx.OpSLessThan(ctx.U1, lhs, rhs); | ||
| 232 | } | ||
| 233 | |||
| 234 | Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 235 | return ctx.OpULessThan(ctx.U1, lhs, rhs); | ||
| 236 | } | ||
| 237 | |||
| 238 | Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 239 | return ctx.OpIEqual(ctx.U1, lhs, rhs); | ||
| 240 | } | ||
| 241 | |||
| 242 | Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 243 | return ctx.OpSLessThanEqual(ctx.U1, lhs, rhs); | ||
| 244 | } | ||
| 245 | |||
| 246 | Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 247 | return ctx.OpULessThanEqual(ctx.U1, lhs, rhs); | ||
| 248 | } | ||
| 249 | |||
| 250 | Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 251 | return ctx.OpSGreaterThan(ctx.U1, lhs, rhs); | ||
| 252 | } | ||
| 253 | |||
| 254 | Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 255 | return ctx.OpUGreaterThan(ctx.U1, lhs, rhs); | ||
| 256 | } | ||
| 257 | |||
| 258 | Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 259 | return ctx.OpINotEqual(ctx.U1, lhs, rhs); | ||
| 260 | } | ||
| 261 | |||
| 262 | Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 263 | return ctx.OpSGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 264 | } | ||
| 265 | |||
| 266 | Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs) { | ||
| 267 | return ctx.OpUGreaterThanEqual(ctx.U1, lhs, rhs); | ||
| 268 | } | ||
| 269 | |||
| 270 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp new file mode 100644 index 000000000..b9a9500fc --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | Id EmitLogicalOr(EmitContext& ctx, Id a, Id b) { | ||
| 11 | return ctx.OpLogicalOr(ctx.U1, a, b); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b) { | ||
| 15 | return ctx.OpLogicalAnd(ctx.U1, a, b); | ||
| 16 | } | ||
| 17 | |||
| 18 | Id EmitLogicalXor(EmitContext& ctx, Id a, Id b) { | ||
| 19 | return ctx.OpLogicalNotEqual(ctx.U1, a, b); | ||
| 20 | } | ||
| 21 | |||
| 22 | Id EmitLogicalNot(EmitContext& ctx, Id value) { | ||
| 23 | return ctx.OpLogicalNot(ctx.U1, value); | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp new file mode 100644 index 000000000..679ee2684 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp | |||
| @@ -0,0 +1,275 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <bit> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 8 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::SPIRV { | ||
| 11 | namespace { | ||
| 12 | Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size, | ||
| 13 | u32 index_offset = 0) { | ||
| 14 | if (offset.IsImmediate()) { | ||
| 15 | const u32 imm_offset{static_cast<u32>(offset.U32() / element_size) + index_offset}; | ||
| 16 | return ctx.Const(imm_offset); | ||
| 17 | } | ||
| 18 | const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 19 | Id index{ctx.Def(offset)}; | ||
| 20 | if (shift != 0) { | ||
| 21 | const Id shift_id{ctx.Const(shift)}; | ||
| 22 | index = ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); | ||
| 23 | } | ||
| 24 | if (index_offset != 0) { | ||
| 25 | index = ctx.OpIAdd(ctx.U32[1], index, ctx.Const(index_offset)); | ||
| 26 | } | ||
| 27 | return index; | ||
| 28 | } | ||
| 29 | |||
| 30 | Id StoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 31 | const StorageTypeDefinition& type_def, size_t element_size, | ||
| 32 | Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { | ||
| 33 | if (!binding.IsImmediate()) { | ||
| 34 | throw NotImplementedException("Dynamic storage buffer indexing"); | ||
| 35 | } | ||
| 36 | const Id ssbo{ctx.ssbos[binding.U32()].*member_ptr}; | ||
| 37 | const Id index{StorageIndex(ctx, offset, element_size, index_offset)}; | ||
| 38 | return ctx.OpAccessChain(type_def.element, ssbo, ctx.u32_zero_value, index); | ||
| 39 | } | ||
| 40 | |||
| 41 | Id LoadStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id result_type, | ||
| 42 | const StorageTypeDefinition& type_def, size_t element_size, | ||
| 43 | Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { | ||
| 44 | const Id pointer{ | ||
| 45 | StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; | ||
| 46 | return ctx.OpLoad(result_type, pointer); | ||
| 47 | } | ||
| 48 | |||
| 49 | Id LoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 50 | u32 index_offset = 0) { | ||
| 51 | return LoadStorage(ctx, binding, offset, ctx.U32[1], ctx.storage_types.U32, sizeof(u32), | ||
| 52 | &StorageDefinitions::U32, index_offset); | ||
| 53 | } | ||
| 54 | |||
| 55 | void WriteStorage(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 56 | const StorageTypeDefinition& type_def, size_t element_size, | ||
| 57 | Id StorageDefinitions::*member_ptr, u32 index_offset = 0) { | ||
| 58 | const Id pointer{ | ||
| 59 | StoragePointer(ctx, binding, offset, type_def, element_size, member_ptr, index_offset)}; | ||
| 60 | ctx.OpStore(pointer, value); | ||
| 61 | } | ||
| 62 | |||
| 63 | void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value, | ||
| 64 | u32 index_offset = 0) { | ||
| 65 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), | ||
| 66 | &StorageDefinitions::U32, index_offset); | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void EmitLoadGlobalU8(EmitContext&) { | ||
| 71 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitLoadGlobalS8(EmitContext&) { | ||
| 75 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 76 | } | ||
| 77 | |||
| 78 | void EmitLoadGlobalU16(EmitContext&) { | ||
| 79 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 80 | } | ||
| 81 | |||
| 82 | void EmitLoadGlobalS16(EmitContext&) { | ||
| 83 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 84 | } | ||
| 85 | |||
| 86 | Id EmitLoadGlobal32(EmitContext& ctx, Id address) { | ||
| 87 | if (ctx.profile.support_int64) { | ||
| 88 | return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); | ||
| 89 | } | ||
| 90 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 91 | return ctx.Const(0u); | ||
| 92 | } | ||
| 93 | |||
| 94 | Id EmitLoadGlobal64(EmitContext& ctx, Id address) { | ||
| 95 | if (ctx.profile.support_int64) { | ||
| 96 | return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); | ||
| 97 | } | ||
| 98 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 99 | return ctx.Const(0u, 0u); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitLoadGlobal128(EmitContext& ctx, Id address) { | ||
| 103 | if (ctx.profile.support_int64) { | ||
| 104 | return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); | ||
| 105 | } | ||
| 106 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 107 | return ctx.Const(0u, 0u, 0u, 0u); | ||
| 108 | } | ||
| 109 | |||
| 110 | void EmitWriteGlobalU8(EmitContext&) { | ||
| 111 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitWriteGlobalS8(EmitContext&) { | ||
| 115 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitWriteGlobalU16(EmitContext&) { | ||
| 119 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitWriteGlobalS16(EmitContext&) { | ||
| 123 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { | ||
| 127 | if (ctx.profile.support_int64) { | ||
| 128 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); | ||
| 129 | return; | ||
| 130 | } | ||
| 131 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { | ||
| 135 | if (ctx.profile.support_int64) { | ||
| 136 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); | ||
| 137 | return; | ||
| 138 | } | ||
| 139 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { | ||
| 143 | if (ctx.profile.support_int64) { | ||
| 144 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | LOG_WARNING(Shader_SPIRV, "Int64 not supported, ignoring memory operation"); | ||
| 148 | } | ||
| 149 | |||
| 150 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 151 | if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { | ||
| 152 | return ctx.OpUConvert(ctx.U32[1], | ||
| 153 | LoadStorage(ctx, binding, offset, ctx.U8, ctx.storage_types.U8, | ||
| 154 | sizeof(u8), &StorageDefinitions::U8)); | ||
| 155 | } else { | ||
| 156 | return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 157 | ctx.BitOffset8(offset), ctx.Const(8u)); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 162 | if (ctx.profile.support_int8 && ctx.profile.support_descriptor_aliasing) { | ||
| 163 | return ctx.OpSConvert(ctx.U32[1], | ||
| 164 | LoadStorage(ctx, binding, offset, ctx.S8, ctx.storage_types.S8, | ||
| 165 | sizeof(s8), &StorageDefinitions::S8)); | ||
| 166 | } else { | ||
| 167 | return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 168 | ctx.BitOffset8(offset), ctx.Const(8u)); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 173 | if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { | ||
| 174 | return ctx.OpUConvert(ctx.U32[1], | ||
| 175 | LoadStorage(ctx, binding, offset, ctx.U16, ctx.storage_types.U16, | ||
| 176 | sizeof(u16), &StorageDefinitions::U16)); | ||
| 177 | } else { | ||
| 178 | return ctx.OpBitFieldUExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 179 | ctx.BitOffset16(offset), ctx.Const(16u)); | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | Id EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 184 | if (ctx.profile.support_int16 && ctx.profile.support_descriptor_aliasing) { | ||
| 185 | return ctx.OpSConvert(ctx.U32[1], | ||
| 186 | LoadStorage(ctx, binding, offset, ctx.S16, ctx.storage_types.S16, | ||
| 187 | sizeof(s16), &StorageDefinitions::S16)); | ||
| 188 | } else { | ||
| 189 | return ctx.OpBitFieldSExtract(ctx.U32[1], LoadStorage32(ctx, binding, offset), | ||
| 190 | ctx.BitOffset16(offset), ctx.Const(16u)); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | Id EmitLoadStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 195 | return LoadStorage32(ctx, binding, offset); | ||
| 196 | } | ||
| 197 | |||
| 198 | Id EmitLoadStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 199 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 200 | return LoadStorage(ctx, binding, offset, ctx.U32[2], ctx.storage_types.U32x2, | ||
| 201 | sizeof(u32[2]), &StorageDefinitions::U32x2); | ||
| 202 | } else { | ||
| 203 | return ctx.OpCompositeConstruct(ctx.U32[2], LoadStorage32(ctx, binding, offset, 0), | ||
| 204 | LoadStorage32(ctx, binding, offset, 1)); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||
| 209 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 210 | return LoadStorage(ctx, binding, offset, ctx.U32[4], ctx.storage_types.U32x4, | ||
| 211 | sizeof(u32[4]), &StorageDefinitions::U32x4); | ||
| 212 | } else { | ||
| 213 | return ctx.OpCompositeConstruct(ctx.U32[4], LoadStorage32(ctx, binding, offset, 0), | ||
| 214 | LoadStorage32(ctx, binding, offset, 1), | ||
| 215 | LoadStorage32(ctx, binding, offset, 2), | ||
| 216 | LoadStorage32(ctx, binding, offset, 3)); | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 221 | Id value) { | ||
| 222 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, | ||
| 223 | sizeof(u8), &StorageDefinitions::U8); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 227 | Id value) { | ||
| 228 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, | ||
| 229 | sizeof(s8), &StorageDefinitions::S8); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 233 | Id value) { | ||
| 234 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, | ||
| 235 | sizeof(u16), &StorageDefinitions::U16); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 239 | Id value) { | ||
| 240 | WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, | ||
| 241 | sizeof(s16), &StorageDefinitions::S16); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 245 | Id value) { | ||
| 246 | WriteStorage32(ctx, binding, offset, value); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 250 | Id value) { | ||
| 251 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 252 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x2, sizeof(u32[2]), | ||
| 253 | &StorageDefinitions::U32x2); | ||
| 254 | } else { | ||
| 255 | for (u32 index = 0; index < 2; ++index) { | ||
| 256 | const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; | ||
| 257 | WriteStorage32(ctx, binding, offset, element, index); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 263 | Id value) { | ||
| 264 | if (ctx.profile.support_descriptor_aliasing) { | ||
| 265 | WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32x4, sizeof(u32[4]), | ||
| 266 | &StorageDefinitions::U32x4); | ||
| 267 | } else { | ||
| 268 | for (u32 index = 0; index < 4; ++index) { | ||
| 269 | const Id element{ctx.OpCompositeExtract(ctx.U32[1], value, index)}; | ||
| 270 | WriteStorage32(ctx, binding, offset, element, index); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | } | ||
| 274 | |||
| 275 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp new file mode 100644 index 000000000..c5b4f4720 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 11 | return ctx.OpSelect(ctx.U1, cond, true_value, false_value); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitSelectU8(EmitContext&, Id, Id, Id) { | ||
| 15 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 16 | } | ||
| 17 | |||
| 18 | Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 19 | return ctx.OpSelect(ctx.U16, cond, true_value, false_value); | ||
| 20 | } | ||
| 21 | |||
| 22 | Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 23 | return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); | ||
| 24 | } | ||
| 25 | |||
| 26 | Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 27 | return ctx.OpSelect(ctx.U64, cond, true_value, false_value); | ||
| 28 | } | ||
| 29 | |||
| 30 | Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 31 | return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value); | ||
| 32 | } | ||
| 33 | |||
| 34 | Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 35 | return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); | ||
| 36 | } | ||
| 37 | |||
| 38 | Id EmitSelectF64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { | ||
| 39 | return ctx.OpSelect(ctx.F64[1], cond, true_value, false_value); | ||
| 40 | } | ||
| 41 | |||
| 42 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp new file mode 100644 index 000000000..9a79fc7a2 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp | |||
| @@ -0,0 +1,174 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { | ||
| 11 | const Id shift_id{ctx.Const(shift)}; | ||
| 12 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 13 | return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); | ||
| 14 | } | ||
| 15 | |||
| 16 | Id Word(EmitContext& ctx, Id offset) { | ||
| 17 | const Id shift_id{ctx.Const(2U)}; | ||
| 18 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 19 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 20 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 21 | } | ||
| 22 | |||
| 23 | std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { | ||
| 24 | const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Const(3U))}; | ||
| 25 | const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(mask))}; | ||
| 26 | const Id count_id{ctx.Const(count)}; | ||
| 27 | return {bit, count_id}; | ||
| 28 | } | ||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 31 | Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { | ||
| 32 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 33 | const Id pointer{ | ||
| 34 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 35 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 36 | } else { | ||
| 37 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 38 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { | ||
| 43 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 44 | const Id pointer{ | ||
| 45 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 46 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); | ||
| 47 | } else { | ||
| 48 | const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; | ||
| 49 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { | ||
| 54 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 55 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 56 | return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 57 | } else { | ||
| 58 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 59 | return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { | ||
| 64 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 65 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 66 | return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); | ||
| 67 | } else { | ||
| 68 | const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; | ||
| 69 | return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | |||
| 73 | Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { | ||
| 74 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 75 | const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; | ||
| 76 | return ctx.OpLoad(ctx.U32[1], pointer); | ||
| 77 | } else { | ||
| 78 | return Word(ctx, offset); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 | Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { | ||
| 83 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 84 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 85 | return ctx.OpLoad(ctx.U32[2], pointer); | ||
| 86 | } else { | ||
| 87 | const Id shift_id{ctx.Const(2U)}; | ||
| 88 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 89 | const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(1U))}; | ||
| 90 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; | ||
| 91 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; | ||
| 92 | return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), | ||
| 93 | ctx.OpLoad(ctx.U32[1], rhs_pointer)); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { | ||
| 98 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 99 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 100 | return ctx.OpLoad(ctx.U32[4], pointer); | ||
| 101 | } | ||
| 102 | const Id shift_id{ctx.Const(2U)}; | ||
| 103 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 104 | std::array<Id, 4> values{}; | ||
| 105 | for (u32 i = 0; i < 4; ++i) { | ||
| 106 | const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; | ||
| 107 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 108 | values[i] = ctx.OpLoad(ctx.U32[1], pointer); | ||
| 109 | } | ||
| 110 | return ctx.OpCompositeConstruct(ctx.U32[4], values); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { | ||
| 114 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 115 | const Id pointer{ | ||
| 116 | ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; | ||
| 117 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); | ||
| 118 | } else { | ||
| 119 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { | ||
| 124 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 125 | const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; | ||
| 126 | ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); | ||
| 127 | } else { | ||
| 128 | ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { | ||
| 133 | Id pointer{}; | ||
| 134 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 135 | pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); | ||
| 136 | } else { | ||
| 137 | const Id shift{ctx.Const(2U)}; | ||
| 138 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 139 | pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); | ||
| 140 | } | ||
| 141 | ctx.OpStore(pointer, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { | ||
| 145 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 146 | const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; | ||
| 147 | ctx.OpStore(pointer, value); | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | const Id shift{ctx.Const(2U)}; | ||
| 151 | const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 152 | const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Const(1U))}; | ||
| 153 | const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; | ||
| 154 | const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; | ||
| 155 | ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); | ||
| 156 | ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { | ||
| 160 | if (ctx.profile.support_explicit_workgroup_layout) { | ||
| 161 | const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; | ||
| 162 | ctx.OpStore(pointer, value); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | const Id shift{ctx.Const(2U)}; | ||
| 166 | const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; | ||
| 167 | for (u32 i = 0; i < 4; ++i) { | ||
| 168 | const Id index{i == 0 ? base_index : ctx.OpIAdd(ctx.U32[1], base_index, ctx.Const(i))}; | ||
| 169 | const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; | ||
| 170 | ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp new file mode 100644 index 000000000..9e7eb3cb1 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | void ConvertDepthMode(EmitContext& ctx) { | ||
| 11 | const Id type{ctx.F32[1]}; | ||
| 12 | const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; | ||
| 13 | const Id z{ctx.OpCompositeExtract(type, position, 2u)}; | ||
| 14 | const Id w{ctx.OpCompositeExtract(type, position, 3u)}; | ||
| 15 | const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; | ||
| 16 | const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; | ||
| 17 | ctx.OpStore(ctx.output_position, vector); | ||
| 18 | } | ||
| 19 | |||
| 20 | void SetFixedPipelinePointSize(EmitContext& ctx) { | ||
| 21 | if (ctx.runtime_info.fixed_state_point_size) { | ||
| 22 | const float point_size{*ctx.runtime_info.fixed_state_point_size}; | ||
| 23 | ctx.OpStore(ctx.output_point_size, ctx.Const(point_size)); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one, | ||
| 28 | Id default_vector) { | ||
| 29 | switch (num_components) { | ||
| 30 | case 1: | ||
| 31 | return element == 3 ? one : zero; | ||
| 32 | case 2: | ||
| 33 | return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero); | ||
| 34 | case 3: | ||
| 35 | return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero); | ||
| 36 | case 4: | ||
| 37 | return default_vector; | ||
| 38 | } | ||
| 39 | throw InvalidArgument("Bad element"); | ||
| 40 | } | ||
| 41 | |||
| 42 | Id ComparisonFunction(EmitContext& ctx, CompareFunction comparison, Id operand_1, Id operand_2) { | ||
| 43 | switch (comparison) { | ||
| 44 | case CompareFunction::Never: | ||
| 45 | return ctx.false_value; | ||
| 46 | case CompareFunction::Less: | ||
| 47 | return ctx.OpFOrdLessThan(ctx.U1, operand_1, operand_2); | ||
| 48 | case CompareFunction::Equal: | ||
| 49 | return ctx.OpFOrdEqual(ctx.U1, operand_1, operand_2); | ||
| 50 | case CompareFunction::LessThanEqual: | ||
| 51 | return ctx.OpFOrdLessThanEqual(ctx.U1, operand_1, operand_2); | ||
| 52 | case CompareFunction::Greater: | ||
| 53 | return ctx.OpFOrdGreaterThan(ctx.U1, operand_1, operand_2); | ||
| 54 | case CompareFunction::NotEqual: | ||
| 55 | return ctx.OpFOrdNotEqual(ctx.U1, operand_1, operand_2); | ||
| 56 | case CompareFunction::GreaterThanEqual: | ||
| 57 | return ctx.OpFOrdGreaterThanEqual(ctx.U1, operand_1, operand_2); | ||
| 58 | case CompareFunction::Always: | ||
| 59 | return ctx.true_value; | ||
| 60 | } | ||
| 61 | throw InvalidArgument("Comparison function {}", comparison); | ||
| 62 | } | ||
| 63 | |||
| 64 | void AlphaTest(EmitContext& ctx) { | ||
| 65 | if (!ctx.runtime_info.alpha_test_func) { | ||
| 66 | return; | ||
| 67 | } | ||
| 68 | const auto comparison{*ctx.runtime_info.alpha_test_func}; | ||
| 69 | if (comparison == CompareFunction::Always) { | ||
| 70 | return; | ||
| 71 | } | ||
| 72 | if (!Sirit::ValidId(ctx.frag_color[0])) { | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | |||
| 76 | const Id type{ctx.F32[1]}; | ||
| 77 | const Id rt0_color{ctx.OpLoad(ctx.F32[4], ctx.frag_color[0])}; | ||
| 78 | const Id alpha{ctx.OpCompositeExtract(type, rt0_color, 3u)}; | ||
| 79 | |||
| 80 | const Id true_label{ctx.OpLabel()}; | ||
| 81 | const Id discard_label{ctx.OpLabel()}; | ||
| 82 | const Id alpha_reference{ctx.Const(ctx.runtime_info.alpha_test_reference)}; | ||
| 83 | const Id condition{ComparisonFunction(ctx, comparison, alpha, alpha_reference)}; | ||
| 84 | |||
| 85 | ctx.OpSelectionMerge(true_label, spv::SelectionControlMask::MaskNone); | ||
| 86 | ctx.OpBranchConditional(condition, true_label, discard_label); | ||
| 87 | ctx.AddLabel(discard_label); | ||
| 88 | ctx.OpKill(); | ||
| 89 | ctx.AddLabel(true_label); | ||
| 90 | } | ||
| 91 | } // Anonymous namespace | ||
| 92 | |||
| 93 | void EmitPrologue(EmitContext& ctx) { | ||
| 94 | if (ctx.stage == Stage::VertexB) { | ||
| 95 | const Id zero{ctx.Const(0.0f)}; | ||
| 96 | const Id one{ctx.Const(1.0f)}; | ||
| 97 | const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)}; | ||
| 98 | ctx.OpStore(ctx.output_position, default_vector); | ||
| 99 | for (const auto& info : ctx.output_generics) { | ||
| 100 | if (info[0].num_components == 0) { | ||
| 101 | continue; | ||
| 102 | } | ||
| 103 | u32 element{0}; | ||
| 104 | while (element < 4) { | ||
| 105 | const auto& element_info{info[element]}; | ||
| 106 | const u32 num{element_info.num_components}; | ||
| 107 | const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)}; | ||
| 108 | ctx.OpStore(element_info.id, value); | ||
| 109 | element += num; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } | ||
| 113 | if (ctx.stage == Stage::VertexB || ctx.stage == Stage::Geometry) { | ||
| 114 | SetFixedPipelinePointSize(ctx); | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitEpilogue(EmitContext& ctx) { | ||
| 119 | if (ctx.stage == Stage::VertexB && ctx.runtime_info.convert_depth_mode) { | ||
| 120 | ConvertDepthMode(ctx); | ||
| 121 | } | ||
| 122 | if (ctx.stage == Stage::Fragment) { | ||
| 123 | AlphaTest(ctx); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | ||
| 128 | if (ctx.runtime_info.convert_depth_mode) { | ||
| 129 | ConvertDepthMode(ctx); | ||
| 130 | } | ||
| 131 | if (stream.IsImmediate()) { | ||
| 132 | ctx.OpEmitStreamVertex(ctx.Def(stream)); | ||
| 133 | } else { | ||
| 134 | LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); | ||
| 135 | ctx.OpEmitStreamVertex(ctx.u32_zero_value); | ||
| 136 | } | ||
| 137 | // Restore fixed pipeline point size after emitting the vertex | ||
| 138 | SetFixedPipelinePointSize(ctx); | ||
| 139 | } | ||
| 140 | |||
| 141 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 142 | if (stream.IsImmediate()) { | ||
| 143 | ctx.OpEndStreamPrimitive(ctx.Def(stream)); | ||
| 144 | } else { | ||
| 145 | LOG_WARNING(Shader_SPIRV, "Stream is not immediate"); | ||
| 146 | ctx.OpEndStreamPrimitive(ctx.u32_zero_value); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp new file mode 100644 index 000000000..c9f469e90 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | |||
| 10 | Id EmitUndefU1(EmitContext& ctx) { | ||
| 11 | return ctx.OpUndef(ctx.U1); | ||
| 12 | } | ||
| 13 | |||
| 14 | Id EmitUndefU8(EmitContext&) { | ||
| 15 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 16 | } | ||
| 17 | |||
| 18 | Id EmitUndefU16(EmitContext&) { | ||
| 19 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | Id EmitUndefU32(EmitContext& ctx) { | ||
| 23 | return ctx.OpUndef(ctx.U32[1]); | ||
| 24 | } | ||
| 25 | |||
| 26 | Id EmitUndefU64(EmitContext&) { | ||
| 27 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..78b1e1ba7 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -0,0 +1,203 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id WarpExtract(EmitContext& ctx, Id value) { | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id LoadMask(EmitContext& ctx, Id mask) { | ||
| 16 | const Id value{ctx.OpLoad(ctx.U32[4], mask)}; | ||
| 17 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 18 | return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); | ||
| 19 | } | ||
| 20 | return WarpExtract(ctx, value); | ||
| 21 | } | ||
| 22 | |||
| 23 | void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||
| 24 | IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 25 | if (!in_bounds) { | ||
| 26 | return; | ||
| 27 | } | ||
| 28 | in_bounds->SetDefinition(result); | ||
| 29 | in_bounds->Invalidate(); | ||
| 30 | } | ||
| 31 | |||
| 32 | Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||
| 33 | return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||
| 34 | } | ||
| 35 | |||
| 36 | Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||
| 37 | return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||
| 38 | ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||
| 39 | } | ||
| 40 | |||
| 41 | Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||
| 42 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 43 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 44 | return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||
| 45 | } | ||
| 46 | |||
| 47 | Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||
| 48 | return ctx.OpSelect(ctx.U32[1], in_range, | ||
| 49 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | Id EmitLaneId(EmitContext& ctx) { | ||
| 54 | const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 56 | return id; | ||
| 57 | } | ||
| 58 | return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); | ||
| 59 | } | ||
| 60 | |||
| 61 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 62 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 63 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 64 | } | ||
| 65 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 66 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 67 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 68 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 69 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 70 | } | ||
| 71 | |||
| 72 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 74 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 75 | } | ||
| 76 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 77 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 78 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 79 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 80 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 85 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 86 | } | ||
| 87 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 88 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 89 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 90 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 91 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 92 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 96 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 97 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 98 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 99 | } | ||
| 100 | return WarpExtract(ctx, ballot); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitSubgroupEqMask(EmitContext& ctx) { | ||
| 104 | return LoadMask(ctx, ctx.subgroup_mask_eq); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitSubgroupLtMask(EmitContext& ctx) { | ||
| 108 | return LoadMask(ctx, ctx.subgroup_mask_lt); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitSubgroupLeMask(EmitContext& ctx) { | ||
| 112 | return LoadMask(ctx, ctx.subgroup_mask_le); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitSubgroupGtMask(EmitContext& ctx) { | ||
| 116 | return LoadMask(ctx, ctx.subgroup_mask_gt); | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitSubgroupGeMask(EmitContext& ctx) { | ||
| 120 | return LoadMask(ctx, ctx.subgroup_mask_ge); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 124 | Id segmentation_mask) { | ||
| 125 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 127 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 128 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||
| 129 | |||
| 130 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||
| 131 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||
| 132 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 133 | |||
| 134 | SetInBoundsFlag(inst, in_range); | ||
| 135 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 136 | } | ||
| 137 | |||
| 138 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 139 | Id segmentation_mask) { | ||
| 140 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 141 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 142 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||
| 143 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 144 | |||
| 145 | SetInBoundsFlag(inst, in_range); | ||
| 146 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 150 | Id segmentation_mask) { | ||
| 151 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 152 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 153 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||
| 154 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 155 | |||
| 156 | SetInBoundsFlag(inst, in_range); | ||
| 157 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 158 | } | ||
| 159 | |||
| 160 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 161 | Id segmentation_mask) { | ||
| 162 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 163 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 164 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||
| 165 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 166 | |||
| 167 | SetInBoundsFlag(inst, in_range); | ||
| 168 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { | ||
| 172 | const Id three{ctx.Const(3U)}; | ||
| 173 | Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 174 | mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||
| 175 | mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); | ||
| 176 | mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); | ||
| 177 | mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||
| 178 | |||
| 179 | const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; | ||
| 180 | const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; | ||
| 181 | |||
| 182 | const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; | ||
| 183 | const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; | ||
| 184 | return ctx.OpFAdd(ctx.F32[1], result_a, result_b); | ||
| 185 | } | ||
| 186 | |||
| 187 | Id EmitDPdxFine(EmitContext& ctx, Id op_a) { | ||
| 188 | return ctx.OpDPdxFine(ctx.F32[1], op_a); | ||
| 189 | } | ||
| 190 | |||
| 191 | Id EmitDPdyFine(EmitContext& ctx, Id op_a) { | ||
| 192 | return ctx.OpDPdyFine(ctx.F32[1], op_a); | ||
| 193 | } | ||
| 194 | |||
| 195 | Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) { | ||
| 196 | return ctx.OpDPdxCoarse(ctx.F32[1], op_a); | ||
| 197 | } | ||
| 198 | |||
| 199 | Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) { | ||
| 200 | return ctx.OpDPdyCoarse(ctx.F32[1], op_a); | ||
| 201 | } | ||
| 202 | |||
| 203 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/environment.h b/src/shader_recompiler/environment.h new file mode 100644 index 000000000..8369d0d84 --- /dev/null +++ b/src/shader_recompiler/environment.h | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/program_header.h" | ||
| 7 | #include "shader_recompiler/shader_info.h" | ||
| 8 | #include "shader_recompiler/stage.h" | ||
| 9 | |||
| 10 | namespace Shader { | ||
| 11 | |||
| 12 | class Environment { | ||
| 13 | public: | ||
| 14 | virtual ~Environment() = default; | ||
| 15 | |||
| 16 | [[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0; | ||
| 17 | |||
| 18 | [[nodiscard]] virtual u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) = 0; | ||
| 19 | |||
| 20 | [[nodiscard]] virtual TextureType ReadTextureType(u32 raw_handle) = 0; | ||
| 21 | |||
| 22 | [[nodiscard]] virtual u32 TextureBoundBuffer() const = 0; | ||
| 23 | |||
| 24 | [[nodiscard]] virtual u32 LocalMemorySize() const = 0; | ||
| 25 | |||
| 26 | [[nodiscard]] virtual u32 SharedMemorySize() const = 0; | ||
| 27 | |||
| 28 | [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | ||
| 29 | |||
| 30 | [[nodiscard]] const ProgramHeader& SPH() const noexcept { | ||
| 31 | return sph; | ||
| 32 | } | ||
| 33 | |||
| 34 | [[nodiscard]] const std::array<u32, 8>& GpPassthroughMask() const noexcept { | ||
| 35 | return gp_passthrough_mask; | ||
| 36 | } | ||
| 37 | |||
| 38 | [[nodiscard]] Stage ShaderStage() const noexcept { | ||
| 39 | return stage; | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] u32 StartAddress() const noexcept { | ||
| 43 | return start_address; | ||
| 44 | } | ||
| 45 | |||
| 46 | protected: | ||
| 47 | ProgramHeader sph{}; | ||
| 48 | std::array<u32, 8> gp_passthrough_mask{}; | ||
| 49 | Stage stage{}; | ||
| 50 | u32 start_address{}; | ||
| 51 | }; | ||
| 52 | |||
| 53 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/exception.h b/src/shader_recompiler/exception.h new file mode 100644 index 000000000..337e7f0c8 --- /dev/null +++ b/src/shader_recompiler/exception.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <stdexcept> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | |||
| 12 | #include <fmt/format.h> | ||
| 13 | |||
| 14 | namespace Shader { | ||
| 15 | |||
| 16 | class Exception : public std::exception { | ||
| 17 | public: | ||
| 18 | explicit Exception(std::string message) noexcept : err_message{std::move(message)} {} | ||
| 19 | |||
| 20 | const char* what() const noexcept override { | ||
| 21 | return err_message.c_str(); | ||
| 22 | } | ||
| 23 | |||
| 24 | void Prepend(std::string_view prepend) { | ||
| 25 | err_message.insert(0, prepend); | ||
| 26 | } | ||
| 27 | |||
| 28 | void Append(std::string_view append) { | ||
| 29 | err_message += append; | ||
| 30 | } | ||
| 31 | |||
| 32 | private: | ||
| 33 | std::string err_message; | ||
| 34 | }; | ||
| 35 | |||
| 36 | class LogicError : public Exception { | ||
| 37 | public: | ||
| 38 | template <typename... Args> | ||
| 39 | LogicError(const char* message, Args&&... args) | ||
| 40 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} | ||
| 41 | }; | ||
| 42 | |||
| 43 | class RuntimeError : public Exception { | ||
| 44 | public: | ||
| 45 | template <typename... Args> | ||
| 46 | RuntimeError(const char* message, Args&&... args) | ||
| 47 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} | ||
| 48 | }; | ||
| 49 | |||
| 50 | class NotImplementedException : public Exception { | ||
| 51 | public: | ||
| 52 | template <typename... Args> | ||
| 53 | NotImplementedException(const char* message, Args&&... args) | ||
| 54 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} { | ||
| 55 | Append(" is not implemented"); | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | |||
| 59 | class InvalidArgument : public Exception { | ||
| 60 | public: | ||
| 61 | template <typename... Args> | ||
| 62 | InvalidArgument(const char* message, Args&&... args) | ||
| 63 | : Exception{fmt::format(fmt::runtime(message), std::forward<Args>(args)...)} {} | ||
| 64 | }; | ||
| 65 | |||
| 66 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h new file mode 100644 index 000000000..b61773487 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | class Block; | ||
| 14 | |||
| 15 | struct AbstractSyntaxNode { | ||
| 16 | enum class Type { | ||
| 17 | Block, | ||
| 18 | If, | ||
| 19 | EndIf, | ||
| 20 | Loop, | ||
| 21 | Repeat, | ||
| 22 | Break, | ||
| 23 | Return, | ||
| 24 | Unreachable, | ||
| 25 | }; | ||
| 26 | union Data { | ||
| 27 | Block* block; | ||
| 28 | struct { | ||
| 29 | U1 cond; | ||
| 30 | Block* body; | ||
| 31 | Block* merge; | ||
| 32 | } if_node; | ||
| 33 | struct { | ||
| 34 | Block* merge; | ||
| 35 | } end_if; | ||
| 36 | struct { | ||
| 37 | Block* body; | ||
| 38 | Block* continue_block; | ||
| 39 | Block* merge; | ||
| 40 | } loop; | ||
| 41 | struct { | ||
| 42 | U1 cond; | ||
| 43 | Block* loop_header; | ||
| 44 | Block* merge; | ||
| 45 | } repeat; | ||
| 46 | struct { | ||
| 47 | U1 cond; | ||
| 48 | Block* merge; | ||
| 49 | Block* skip; | ||
| 50 | } break_node; | ||
| 51 | }; | ||
| 52 | |||
| 53 | Data data{}; | ||
| 54 | Type type{}; | ||
| 55 | }; | ||
| 56 | using AbstractSyntaxList = std::vector<AbstractSyntaxNode>; | ||
| 57 | |||
| 58 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp new file mode 100644 index 000000000..4d0b8b8e5 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.cpp | |||
| @@ -0,0 +1,454 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <fmt/format.h> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | |||
| 12 | bool IsGeneric(Attribute attribute) noexcept { | ||
| 13 | return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X; | ||
| 14 | } | ||
| 15 | |||
| 16 | u32 GenericAttributeIndex(Attribute attribute) { | ||
| 17 | if (!IsGeneric(attribute)) { | ||
| 18 | throw InvalidArgument("Attribute is not generic {}", attribute); | ||
| 19 | } | ||
| 20 | return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u; | ||
| 21 | } | ||
| 22 | |||
| 23 | u32 GenericAttributeElement(Attribute attribute) { | ||
| 24 | if (!IsGeneric(attribute)) { | ||
| 25 | throw InvalidArgument("Attribute is not generic {}", attribute); | ||
| 26 | } | ||
| 27 | return static_cast<u32>(attribute) % 4; | ||
| 28 | } | ||
| 29 | |||
| 30 | std::string NameOf(Attribute attribute) { | ||
| 31 | switch (attribute) { | ||
| 32 | case Attribute::PrimitiveId: | ||
| 33 | return "PrimitiveId"; | ||
| 34 | case Attribute::Layer: | ||
| 35 | return "Layer"; | ||
| 36 | case Attribute::ViewportIndex: | ||
| 37 | return "ViewportIndex"; | ||
| 38 | case Attribute::PointSize: | ||
| 39 | return "PointSize"; | ||
| 40 | case Attribute::PositionX: | ||
| 41 | return "Position.X"; | ||
| 42 | case Attribute::PositionY: | ||
| 43 | return "Position.Y"; | ||
| 44 | case Attribute::PositionZ: | ||
| 45 | return "Position.Z"; | ||
| 46 | case Attribute::PositionW: | ||
| 47 | return "Position.W"; | ||
| 48 | case Attribute::Generic0X: | ||
| 49 | return "Generic[0].X"; | ||
| 50 | case Attribute::Generic0Y: | ||
| 51 | return "Generic[0].Y"; | ||
| 52 | case Attribute::Generic0Z: | ||
| 53 | return "Generic[0].Z"; | ||
| 54 | case Attribute::Generic0W: | ||
| 55 | return "Generic[0].W"; | ||
| 56 | case Attribute::Generic1X: | ||
| 57 | return "Generic[1].X"; | ||
| 58 | case Attribute::Generic1Y: | ||
| 59 | return "Generic[1].Y"; | ||
| 60 | case Attribute::Generic1Z: | ||
| 61 | return "Generic[1].Z"; | ||
| 62 | case Attribute::Generic1W: | ||
| 63 | return "Generic[1].W"; | ||
| 64 | case Attribute::Generic2X: | ||
| 65 | return "Generic[2].X"; | ||
| 66 | case Attribute::Generic2Y: | ||
| 67 | return "Generic[2].Y"; | ||
| 68 | case Attribute::Generic2Z: | ||
| 69 | return "Generic[2].Z"; | ||
| 70 | case Attribute::Generic2W: | ||
| 71 | return "Generic[2].W"; | ||
| 72 | case Attribute::Generic3X: | ||
| 73 | return "Generic[3].X"; | ||
| 74 | case Attribute::Generic3Y: | ||
| 75 | return "Generic[3].Y"; | ||
| 76 | case Attribute::Generic3Z: | ||
| 77 | return "Generic[3].Z"; | ||
| 78 | case Attribute::Generic3W: | ||
| 79 | return "Generic[3].W"; | ||
| 80 | case Attribute::Generic4X: | ||
| 81 | return "Generic[4].X"; | ||
| 82 | case Attribute::Generic4Y: | ||
| 83 | return "Generic[4].Y"; | ||
| 84 | case Attribute::Generic4Z: | ||
| 85 | return "Generic[4].Z"; | ||
| 86 | case Attribute::Generic4W: | ||
| 87 | return "Generic[4].W"; | ||
| 88 | case Attribute::Generic5X: | ||
| 89 | return "Generic[5].X"; | ||
| 90 | case Attribute::Generic5Y: | ||
| 91 | return "Generic[5].Y"; | ||
| 92 | case Attribute::Generic5Z: | ||
| 93 | return "Generic[5].Z"; | ||
| 94 | case Attribute::Generic5W: | ||
| 95 | return "Generic[5].W"; | ||
| 96 | case Attribute::Generic6X: | ||
| 97 | return "Generic[6].X"; | ||
| 98 | case Attribute::Generic6Y: | ||
| 99 | return "Generic[6].Y"; | ||
| 100 | case Attribute::Generic6Z: | ||
| 101 | return "Generic[6].Z"; | ||
| 102 | case Attribute::Generic6W: | ||
| 103 | return "Generic[6].W"; | ||
| 104 | case Attribute::Generic7X: | ||
| 105 | return "Generic[7].X"; | ||
| 106 | case Attribute::Generic7Y: | ||
| 107 | return "Generic[7].Y"; | ||
| 108 | case Attribute::Generic7Z: | ||
| 109 | return "Generic[7].Z"; | ||
| 110 | case Attribute::Generic7W: | ||
| 111 | return "Generic[7].W"; | ||
| 112 | case Attribute::Generic8X: | ||
| 113 | return "Generic[8].X"; | ||
| 114 | case Attribute::Generic8Y: | ||
| 115 | return "Generic[8].Y"; | ||
| 116 | case Attribute::Generic8Z: | ||
| 117 | return "Generic[8].Z"; | ||
| 118 | case Attribute::Generic8W: | ||
| 119 | return "Generic[8].W"; | ||
| 120 | case Attribute::Generic9X: | ||
| 121 | return "Generic[9].X"; | ||
| 122 | case Attribute::Generic9Y: | ||
| 123 | return "Generic[9].Y"; | ||
| 124 | case Attribute::Generic9Z: | ||
| 125 | return "Generic[9].Z"; | ||
| 126 | case Attribute::Generic9W: | ||
| 127 | return "Generic[9].W"; | ||
| 128 | case Attribute::Generic10X: | ||
| 129 | return "Generic[10].X"; | ||
| 130 | case Attribute::Generic10Y: | ||
| 131 | return "Generic[10].Y"; | ||
| 132 | case Attribute::Generic10Z: | ||
| 133 | return "Generic[10].Z"; | ||
| 134 | case Attribute::Generic10W: | ||
| 135 | return "Generic[10].W"; | ||
| 136 | case Attribute::Generic11X: | ||
| 137 | return "Generic[11].X"; | ||
| 138 | case Attribute::Generic11Y: | ||
| 139 | return "Generic[11].Y"; | ||
| 140 | case Attribute::Generic11Z: | ||
| 141 | return "Generic[11].Z"; | ||
| 142 | case Attribute::Generic11W: | ||
| 143 | return "Generic[11].W"; | ||
| 144 | case Attribute::Generic12X: | ||
| 145 | return "Generic[12].X"; | ||
| 146 | case Attribute::Generic12Y: | ||
| 147 | return "Generic[12].Y"; | ||
| 148 | case Attribute::Generic12Z: | ||
| 149 | return "Generic[12].Z"; | ||
| 150 | case Attribute::Generic12W: | ||
| 151 | return "Generic[12].W"; | ||
| 152 | case Attribute::Generic13X: | ||
| 153 | return "Generic[13].X"; | ||
| 154 | case Attribute::Generic13Y: | ||
| 155 | return "Generic[13].Y"; | ||
| 156 | case Attribute::Generic13Z: | ||
| 157 | return "Generic[13].Z"; | ||
| 158 | case Attribute::Generic13W: | ||
| 159 | return "Generic[13].W"; | ||
| 160 | case Attribute::Generic14X: | ||
| 161 | return "Generic[14].X"; | ||
| 162 | case Attribute::Generic14Y: | ||
| 163 | return "Generic[14].Y"; | ||
| 164 | case Attribute::Generic14Z: | ||
| 165 | return "Generic[14].Z"; | ||
| 166 | case Attribute::Generic14W: | ||
| 167 | return "Generic[14].W"; | ||
| 168 | case Attribute::Generic15X: | ||
| 169 | return "Generic[15].X"; | ||
| 170 | case Attribute::Generic15Y: | ||
| 171 | return "Generic[15].Y"; | ||
| 172 | case Attribute::Generic15Z: | ||
| 173 | return "Generic[15].Z"; | ||
| 174 | case Attribute::Generic15W: | ||
| 175 | return "Generic[15].W"; | ||
| 176 | case Attribute::Generic16X: | ||
| 177 | return "Generic[16].X"; | ||
| 178 | case Attribute::Generic16Y: | ||
| 179 | return "Generic[16].Y"; | ||
| 180 | case Attribute::Generic16Z: | ||
| 181 | return "Generic[16].Z"; | ||
| 182 | case Attribute::Generic16W: | ||
| 183 | return "Generic[16].W"; | ||
| 184 | case Attribute::Generic17X: | ||
| 185 | return "Generic[17].X"; | ||
| 186 | case Attribute::Generic17Y: | ||
| 187 | return "Generic[17].Y"; | ||
| 188 | case Attribute::Generic17Z: | ||
| 189 | return "Generic[17].Z"; | ||
| 190 | case Attribute::Generic17W: | ||
| 191 | return "Generic[17].W"; | ||
| 192 | case Attribute::Generic18X: | ||
| 193 | return "Generic[18].X"; | ||
| 194 | case Attribute::Generic18Y: | ||
| 195 | return "Generic[18].Y"; | ||
| 196 | case Attribute::Generic18Z: | ||
| 197 | return "Generic[18].Z"; | ||
| 198 | case Attribute::Generic18W: | ||
| 199 | return "Generic[18].W"; | ||
| 200 | case Attribute::Generic19X: | ||
| 201 | return "Generic[19].X"; | ||
| 202 | case Attribute::Generic19Y: | ||
| 203 | return "Generic[19].Y"; | ||
| 204 | case Attribute::Generic19Z: | ||
| 205 | return "Generic[19].Z"; | ||
| 206 | case Attribute::Generic19W: | ||
| 207 | return "Generic[19].W"; | ||
| 208 | case Attribute::Generic20X: | ||
| 209 | return "Generic[20].X"; | ||
| 210 | case Attribute::Generic20Y: | ||
| 211 | return "Generic[20].Y"; | ||
| 212 | case Attribute::Generic20Z: | ||
| 213 | return "Generic[20].Z"; | ||
| 214 | case Attribute::Generic20W: | ||
| 215 | return "Generic[20].W"; | ||
| 216 | case Attribute::Generic21X: | ||
| 217 | return "Generic[21].X"; | ||
| 218 | case Attribute::Generic21Y: | ||
| 219 | return "Generic[21].Y"; | ||
| 220 | case Attribute::Generic21Z: | ||
| 221 | return "Generic[21].Z"; | ||
| 222 | case Attribute::Generic21W: | ||
| 223 | return "Generic[21].W"; | ||
| 224 | case Attribute::Generic22X: | ||
| 225 | return "Generic[22].X"; | ||
| 226 | case Attribute::Generic22Y: | ||
| 227 | return "Generic[22].Y"; | ||
| 228 | case Attribute::Generic22Z: | ||
| 229 | return "Generic[22].Z"; | ||
| 230 | case Attribute::Generic22W: | ||
| 231 | return "Generic[22].W"; | ||
| 232 | case Attribute::Generic23X: | ||
| 233 | return "Generic[23].X"; | ||
| 234 | case Attribute::Generic23Y: | ||
| 235 | return "Generic[23].Y"; | ||
| 236 | case Attribute::Generic23Z: | ||
| 237 | return "Generic[23].Z"; | ||
| 238 | case Attribute::Generic23W: | ||
| 239 | return "Generic[23].W"; | ||
| 240 | case Attribute::Generic24X: | ||
| 241 | return "Generic[24].X"; | ||
| 242 | case Attribute::Generic24Y: | ||
| 243 | return "Generic[24].Y"; | ||
| 244 | case Attribute::Generic24Z: | ||
| 245 | return "Generic[24].Z"; | ||
| 246 | case Attribute::Generic24W: | ||
| 247 | return "Generic[24].W"; | ||
| 248 | case Attribute::Generic25X: | ||
| 249 | return "Generic[25].X"; | ||
| 250 | case Attribute::Generic25Y: | ||
| 251 | return "Generic[25].Y"; | ||
| 252 | case Attribute::Generic25Z: | ||
| 253 | return "Generic[25].Z"; | ||
| 254 | case Attribute::Generic25W: | ||
| 255 | return "Generic[25].W"; | ||
| 256 | case Attribute::Generic26X: | ||
| 257 | return "Generic[26].X"; | ||
| 258 | case Attribute::Generic26Y: | ||
| 259 | return "Generic[26].Y"; | ||
| 260 | case Attribute::Generic26Z: | ||
| 261 | return "Generic[26].Z"; | ||
| 262 | case Attribute::Generic26W: | ||
| 263 | return "Generic[26].W"; | ||
| 264 | case Attribute::Generic27X: | ||
| 265 | return "Generic[27].X"; | ||
| 266 | case Attribute::Generic27Y: | ||
| 267 | return "Generic[27].Y"; | ||
| 268 | case Attribute::Generic27Z: | ||
| 269 | return "Generic[27].Z"; | ||
| 270 | case Attribute::Generic27W: | ||
| 271 | return "Generic[27].W"; | ||
| 272 | case Attribute::Generic28X: | ||
| 273 | return "Generic[28].X"; | ||
| 274 | case Attribute::Generic28Y: | ||
| 275 | return "Generic[28].Y"; | ||
| 276 | case Attribute::Generic28Z: | ||
| 277 | return "Generic[28].Z"; | ||
| 278 | case Attribute::Generic28W: | ||
| 279 | return "Generic[28].W"; | ||
| 280 | case Attribute::Generic29X: | ||
| 281 | return "Generic[29].X"; | ||
| 282 | case Attribute::Generic29Y: | ||
| 283 | return "Generic[29].Y"; | ||
| 284 | case Attribute::Generic29Z: | ||
| 285 | return "Generic[29].Z"; | ||
| 286 | case Attribute::Generic29W: | ||
| 287 | return "Generic[29].W"; | ||
| 288 | case Attribute::Generic30X: | ||
| 289 | return "Generic[30].X"; | ||
| 290 | case Attribute::Generic30Y: | ||
| 291 | return "Generic[30].Y"; | ||
| 292 | case Attribute::Generic30Z: | ||
| 293 | return "Generic[30].Z"; | ||
| 294 | case Attribute::Generic30W: | ||
| 295 | return "Generic[30].W"; | ||
| 296 | case Attribute::Generic31X: | ||
| 297 | return "Generic[31].X"; | ||
| 298 | case Attribute::Generic31Y: | ||
| 299 | return "Generic[31].Y"; | ||
| 300 | case Attribute::Generic31Z: | ||
| 301 | return "Generic[31].Z"; | ||
| 302 | case Attribute::Generic31W: | ||
| 303 | return "Generic[31].W"; | ||
| 304 | case Attribute::ColorFrontDiffuseR: | ||
| 305 | return "ColorFrontDiffuse.R"; | ||
| 306 | case Attribute::ColorFrontDiffuseG: | ||
| 307 | return "ColorFrontDiffuse.G"; | ||
| 308 | case Attribute::ColorFrontDiffuseB: | ||
| 309 | return "ColorFrontDiffuse.B"; | ||
| 310 | case Attribute::ColorFrontDiffuseA: | ||
| 311 | return "ColorFrontDiffuse.A"; | ||
| 312 | case Attribute::ColorFrontSpecularR: | ||
| 313 | return "ColorFrontSpecular.R"; | ||
| 314 | case Attribute::ColorFrontSpecularG: | ||
| 315 | return "ColorFrontSpecular.G"; | ||
| 316 | case Attribute::ColorFrontSpecularB: | ||
| 317 | return "ColorFrontSpecular.B"; | ||
| 318 | case Attribute::ColorFrontSpecularA: | ||
| 319 | return "ColorFrontSpecular.A"; | ||
| 320 | case Attribute::ColorBackDiffuseR: | ||
| 321 | return "ColorBackDiffuse.R"; | ||
| 322 | case Attribute::ColorBackDiffuseG: | ||
| 323 | return "ColorBackDiffuse.G"; | ||
| 324 | case Attribute::ColorBackDiffuseB: | ||
| 325 | return "ColorBackDiffuse.B"; | ||
| 326 | case Attribute::ColorBackDiffuseA: | ||
| 327 | return "ColorBackDiffuse.A"; | ||
| 328 | case Attribute::ColorBackSpecularR: | ||
| 329 | return "ColorBackSpecular.R"; | ||
| 330 | case Attribute::ColorBackSpecularG: | ||
| 331 | return "ColorBackSpecular.G"; | ||
| 332 | case Attribute::ColorBackSpecularB: | ||
| 333 | return "ColorBackSpecular.B"; | ||
| 334 | case Attribute::ColorBackSpecularA: | ||
| 335 | return "ColorBackSpecular.A"; | ||
| 336 | case Attribute::ClipDistance0: | ||
| 337 | return "ClipDistance[0]"; | ||
| 338 | case Attribute::ClipDistance1: | ||
| 339 | return "ClipDistance[1]"; | ||
| 340 | case Attribute::ClipDistance2: | ||
| 341 | return "ClipDistance[2]"; | ||
| 342 | case Attribute::ClipDistance3: | ||
| 343 | return "ClipDistance[3]"; | ||
| 344 | case Attribute::ClipDistance4: | ||
| 345 | return "ClipDistance[4]"; | ||
| 346 | case Attribute::ClipDistance5: | ||
| 347 | return "ClipDistance[5]"; | ||
| 348 | case Attribute::ClipDistance6: | ||
| 349 | return "ClipDistance[6]"; | ||
| 350 | case Attribute::ClipDistance7: | ||
| 351 | return "ClipDistance[7]"; | ||
| 352 | case Attribute::PointSpriteS: | ||
| 353 | return "PointSprite.S"; | ||
| 354 | case Attribute::PointSpriteT: | ||
| 355 | return "PointSprite.T"; | ||
| 356 | case Attribute::FogCoordinate: | ||
| 357 | return "FogCoordinate"; | ||
| 358 | case Attribute::TessellationEvaluationPointU: | ||
| 359 | return "TessellationEvaluationPoint.U"; | ||
| 360 | case Attribute::TessellationEvaluationPointV: | ||
| 361 | return "TessellationEvaluationPoint.V"; | ||
| 362 | case Attribute::InstanceId: | ||
| 363 | return "InstanceId"; | ||
| 364 | case Attribute::VertexId: | ||
| 365 | return "VertexId"; | ||
| 366 | case Attribute::FixedFncTexture0S: | ||
| 367 | return "FixedFncTexture[0].S"; | ||
| 368 | case Attribute::FixedFncTexture0T: | ||
| 369 | return "FixedFncTexture[0].T"; | ||
| 370 | case Attribute::FixedFncTexture0R: | ||
| 371 | return "FixedFncTexture[0].R"; | ||
| 372 | case Attribute::FixedFncTexture0Q: | ||
| 373 | return "FixedFncTexture[0].Q"; | ||
| 374 | case Attribute::FixedFncTexture1S: | ||
| 375 | return "FixedFncTexture[1].S"; | ||
| 376 | case Attribute::FixedFncTexture1T: | ||
| 377 | return "FixedFncTexture[1].T"; | ||
| 378 | case Attribute::FixedFncTexture1R: | ||
| 379 | return "FixedFncTexture[1].R"; | ||
| 380 | case Attribute::FixedFncTexture1Q: | ||
| 381 | return "FixedFncTexture[1].Q"; | ||
| 382 | case Attribute::FixedFncTexture2S: | ||
| 383 | return "FixedFncTexture[2].S"; | ||
| 384 | case Attribute::FixedFncTexture2T: | ||
| 385 | return "FixedFncTexture[2].T"; | ||
| 386 | case Attribute::FixedFncTexture2R: | ||
| 387 | return "FixedFncTexture[2].R"; | ||
| 388 | case Attribute::FixedFncTexture2Q: | ||
| 389 | return "FixedFncTexture[2].Q"; | ||
| 390 | case Attribute::FixedFncTexture3S: | ||
| 391 | return "FixedFncTexture[3].S"; | ||
| 392 | case Attribute::FixedFncTexture3T: | ||
| 393 | return "FixedFncTexture[3].T"; | ||
| 394 | case Attribute::FixedFncTexture3R: | ||
| 395 | return "FixedFncTexture[3].R"; | ||
| 396 | case Attribute::FixedFncTexture3Q: | ||
| 397 | return "FixedFncTexture[3].Q"; | ||
| 398 | case Attribute::FixedFncTexture4S: | ||
| 399 | return "FixedFncTexture[4].S"; | ||
| 400 | case Attribute::FixedFncTexture4T: | ||
| 401 | return "FixedFncTexture[4].T"; | ||
| 402 | case Attribute::FixedFncTexture4R: | ||
| 403 | return "FixedFncTexture[4].R"; | ||
| 404 | case Attribute::FixedFncTexture4Q: | ||
| 405 | return "FixedFncTexture[4].Q"; | ||
| 406 | case Attribute::FixedFncTexture5S: | ||
| 407 | return "FixedFncTexture[5].S"; | ||
| 408 | case Attribute::FixedFncTexture5T: | ||
| 409 | return "FixedFncTexture[5].T"; | ||
| 410 | case Attribute::FixedFncTexture5R: | ||
| 411 | return "FixedFncTexture[5].R"; | ||
| 412 | case Attribute::FixedFncTexture5Q: | ||
| 413 | return "FixedFncTexture[5].Q"; | ||
| 414 | case Attribute::FixedFncTexture6S: | ||
| 415 | return "FixedFncTexture[6].S"; | ||
| 416 | case Attribute::FixedFncTexture6T: | ||
| 417 | return "FixedFncTexture[6].T"; | ||
| 418 | case Attribute::FixedFncTexture6R: | ||
| 419 | return "FixedFncTexture[6].R"; | ||
| 420 | case Attribute::FixedFncTexture6Q: | ||
| 421 | return "FixedFncTexture[6].Q"; | ||
| 422 | case Attribute::FixedFncTexture7S: | ||
| 423 | return "FixedFncTexture[7].S"; | ||
| 424 | case Attribute::FixedFncTexture7T: | ||
| 425 | return "FixedFncTexture[7].T"; | ||
| 426 | case Attribute::FixedFncTexture7R: | ||
| 427 | return "FixedFncTexture[7].R"; | ||
| 428 | case Attribute::FixedFncTexture7Q: | ||
| 429 | return "FixedFncTexture[7].Q"; | ||
| 430 | case Attribute::FixedFncTexture8S: | ||
| 431 | return "FixedFncTexture[8].S"; | ||
| 432 | case Attribute::FixedFncTexture8T: | ||
| 433 | return "FixedFncTexture[8].T"; | ||
| 434 | case Attribute::FixedFncTexture8R: | ||
| 435 | return "FixedFncTexture[8].R"; | ||
| 436 | case Attribute::FixedFncTexture8Q: | ||
| 437 | return "FixedFncTexture[8].Q"; | ||
| 438 | case Attribute::FixedFncTexture9S: | ||
| 439 | return "FixedFncTexture[9].S"; | ||
| 440 | case Attribute::FixedFncTexture9T: | ||
| 441 | return "FixedFncTexture[9].T"; | ||
| 442 | case Attribute::FixedFncTexture9R: | ||
| 443 | return "FixedFncTexture[9].R"; | ||
| 444 | case Attribute::FixedFncTexture9Q: | ||
| 445 | return "FixedFncTexture[9].Q"; | ||
| 446 | case Attribute::ViewportMask: | ||
| 447 | return "ViewportMask"; | ||
| 448 | case Attribute::FrontFace: | ||
| 449 | return "FrontFace"; | ||
| 450 | } | ||
| 451 | return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); | ||
| 452 | } | ||
| 453 | |||
| 454 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h new file mode 100644 index 000000000..ca1199494 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/attribute.h | |||
| @@ -0,0 +1,250 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | enum class Attribute : u64 { | ||
| 14 | PrimitiveId = 24, | ||
| 15 | Layer = 25, | ||
| 16 | ViewportIndex = 26, | ||
| 17 | PointSize = 27, | ||
| 18 | PositionX = 28, | ||
| 19 | PositionY = 29, | ||
| 20 | PositionZ = 30, | ||
| 21 | PositionW = 31, | ||
| 22 | Generic0X = 32, | ||
| 23 | Generic0Y = 33, | ||
| 24 | Generic0Z = 34, | ||
| 25 | Generic0W = 35, | ||
| 26 | Generic1X = 36, | ||
| 27 | Generic1Y = 37, | ||
| 28 | Generic1Z = 38, | ||
| 29 | Generic1W = 39, | ||
| 30 | Generic2X = 40, | ||
| 31 | Generic2Y = 41, | ||
| 32 | Generic2Z = 42, | ||
| 33 | Generic2W = 43, | ||
| 34 | Generic3X = 44, | ||
| 35 | Generic3Y = 45, | ||
| 36 | Generic3Z = 46, | ||
| 37 | Generic3W = 47, | ||
| 38 | Generic4X = 48, | ||
| 39 | Generic4Y = 49, | ||
| 40 | Generic4Z = 50, | ||
| 41 | Generic4W = 51, | ||
| 42 | Generic5X = 52, | ||
| 43 | Generic5Y = 53, | ||
| 44 | Generic5Z = 54, | ||
| 45 | Generic5W = 55, | ||
| 46 | Generic6X = 56, | ||
| 47 | Generic6Y = 57, | ||
| 48 | Generic6Z = 58, | ||
| 49 | Generic6W = 59, | ||
| 50 | Generic7X = 60, | ||
| 51 | Generic7Y = 61, | ||
| 52 | Generic7Z = 62, | ||
| 53 | Generic7W = 63, | ||
| 54 | Generic8X = 64, | ||
| 55 | Generic8Y = 65, | ||
| 56 | Generic8Z = 66, | ||
| 57 | Generic8W = 67, | ||
| 58 | Generic9X = 68, | ||
| 59 | Generic9Y = 69, | ||
| 60 | Generic9Z = 70, | ||
| 61 | Generic9W = 71, | ||
| 62 | Generic10X = 72, | ||
| 63 | Generic10Y = 73, | ||
| 64 | Generic10Z = 74, | ||
| 65 | Generic10W = 75, | ||
| 66 | Generic11X = 76, | ||
| 67 | Generic11Y = 77, | ||
| 68 | Generic11Z = 78, | ||
| 69 | Generic11W = 79, | ||
| 70 | Generic12X = 80, | ||
| 71 | Generic12Y = 81, | ||
| 72 | Generic12Z = 82, | ||
| 73 | Generic12W = 83, | ||
| 74 | Generic13X = 84, | ||
| 75 | Generic13Y = 85, | ||
| 76 | Generic13Z = 86, | ||
| 77 | Generic13W = 87, | ||
| 78 | Generic14X = 88, | ||
| 79 | Generic14Y = 89, | ||
| 80 | Generic14Z = 90, | ||
| 81 | Generic14W = 91, | ||
| 82 | Generic15X = 92, | ||
| 83 | Generic15Y = 93, | ||
| 84 | Generic15Z = 94, | ||
| 85 | Generic15W = 95, | ||
| 86 | Generic16X = 96, | ||
| 87 | Generic16Y = 97, | ||
| 88 | Generic16Z = 98, | ||
| 89 | Generic16W = 99, | ||
| 90 | Generic17X = 100, | ||
| 91 | Generic17Y = 101, | ||
| 92 | Generic17Z = 102, | ||
| 93 | Generic17W = 103, | ||
| 94 | Generic18X = 104, | ||
| 95 | Generic18Y = 105, | ||
| 96 | Generic18Z = 106, | ||
| 97 | Generic18W = 107, | ||
| 98 | Generic19X = 108, | ||
| 99 | Generic19Y = 109, | ||
| 100 | Generic19Z = 110, | ||
| 101 | Generic19W = 111, | ||
| 102 | Generic20X = 112, | ||
| 103 | Generic20Y = 113, | ||
| 104 | Generic20Z = 114, | ||
| 105 | Generic20W = 115, | ||
| 106 | Generic21X = 116, | ||
| 107 | Generic21Y = 117, | ||
| 108 | Generic21Z = 118, | ||
| 109 | Generic21W = 119, | ||
| 110 | Generic22X = 120, | ||
| 111 | Generic22Y = 121, | ||
| 112 | Generic22Z = 122, | ||
| 113 | Generic22W = 123, | ||
| 114 | Generic23X = 124, | ||
| 115 | Generic23Y = 125, | ||
| 116 | Generic23Z = 126, | ||
| 117 | Generic23W = 127, | ||
| 118 | Generic24X = 128, | ||
| 119 | Generic24Y = 129, | ||
| 120 | Generic24Z = 130, | ||
| 121 | Generic24W = 131, | ||
| 122 | Generic25X = 132, | ||
| 123 | Generic25Y = 133, | ||
| 124 | Generic25Z = 134, | ||
| 125 | Generic25W = 135, | ||
| 126 | Generic26X = 136, | ||
| 127 | Generic26Y = 137, | ||
| 128 | Generic26Z = 138, | ||
| 129 | Generic26W = 139, | ||
| 130 | Generic27X = 140, | ||
| 131 | Generic27Y = 141, | ||
| 132 | Generic27Z = 142, | ||
| 133 | Generic27W = 143, | ||
| 134 | Generic28X = 144, | ||
| 135 | Generic28Y = 145, | ||
| 136 | Generic28Z = 146, | ||
| 137 | Generic28W = 147, | ||
| 138 | Generic29X = 148, | ||
| 139 | Generic29Y = 149, | ||
| 140 | Generic29Z = 150, | ||
| 141 | Generic29W = 151, | ||
| 142 | Generic30X = 152, | ||
| 143 | Generic30Y = 153, | ||
| 144 | Generic30Z = 154, | ||
| 145 | Generic30W = 155, | ||
| 146 | Generic31X = 156, | ||
| 147 | Generic31Y = 157, | ||
| 148 | Generic31Z = 158, | ||
| 149 | Generic31W = 159, | ||
| 150 | ColorFrontDiffuseR = 160, | ||
| 151 | ColorFrontDiffuseG = 161, | ||
| 152 | ColorFrontDiffuseB = 162, | ||
| 153 | ColorFrontDiffuseA = 163, | ||
| 154 | ColorFrontSpecularR = 164, | ||
| 155 | ColorFrontSpecularG = 165, | ||
| 156 | ColorFrontSpecularB = 166, | ||
| 157 | ColorFrontSpecularA = 167, | ||
| 158 | ColorBackDiffuseR = 168, | ||
| 159 | ColorBackDiffuseG = 169, | ||
| 160 | ColorBackDiffuseB = 170, | ||
| 161 | ColorBackDiffuseA = 171, | ||
| 162 | ColorBackSpecularR = 172, | ||
| 163 | ColorBackSpecularG = 173, | ||
| 164 | ColorBackSpecularB = 174, | ||
| 165 | ColorBackSpecularA = 175, | ||
| 166 | ClipDistance0 = 176, | ||
| 167 | ClipDistance1 = 177, | ||
| 168 | ClipDistance2 = 178, | ||
| 169 | ClipDistance3 = 179, | ||
| 170 | ClipDistance4 = 180, | ||
| 171 | ClipDistance5 = 181, | ||
| 172 | ClipDistance6 = 182, | ||
| 173 | ClipDistance7 = 183, | ||
| 174 | PointSpriteS = 184, | ||
| 175 | PointSpriteT = 185, | ||
| 176 | FogCoordinate = 186, | ||
| 177 | TessellationEvaluationPointU = 188, | ||
| 178 | TessellationEvaluationPointV = 189, | ||
| 179 | InstanceId = 190, | ||
| 180 | VertexId = 191, | ||
| 181 | FixedFncTexture0S = 192, | ||
| 182 | FixedFncTexture0T = 193, | ||
| 183 | FixedFncTexture0R = 194, | ||
| 184 | FixedFncTexture0Q = 195, | ||
| 185 | FixedFncTexture1S = 196, | ||
| 186 | FixedFncTexture1T = 197, | ||
| 187 | FixedFncTexture1R = 198, | ||
| 188 | FixedFncTexture1Q = 199, | ||
| 189 | FixedFncTexture2S = 200, | ||
| 190 | FixedFncTexture2T = 201, | ||
| 191 | FixedFncTexture2R = 202, | ||
| 192 | FixedFncTexture2Q = 203, | ||
| 193 | FixedFncTexture3S = 204, | ||
| 194 | FixedFncTexture3T = 205, | ||
| 195 | FixedFncTexture3R = 206, | ||
| 196 | FixedFncTexture3Q = 207, | ||
| 197 | FixedFncTexture4S = 208, | ||
| 198 | FixedFncTexture4T = 209, | ||
| 199 | FixedFncTexture4R = 210, | ||
| 200 | FixedFncTexture4Q = 211, | ||
| 201 | FixedFncTexture5S = 212, | ||
| 202 | FixedFncTexture5T = 213, | ||
| 203 | FixedFncTexture5R = 214, | ||
| 204 | FixedFncTexture5Q = 215, | ||
| 205 | FixedFncTexture6S = 216, | ||
| 206 | FixedFncTexture6T = 217, | ||
| 207 | FixedFncTexture6R = 218, | ||
| 208 | FixedFncTexture6Q = 219, | ||
| 209 | FixedFncTexture7S = 220, | ||
| 210 | FixedFncTexture7T = 221, | ||
| 211 | FixedFncTexture7R = 222, | ||
| 212 | FixedFncTexture7Q = 223, | ||
| 213 | FixedFncTexture8S = 224, | ||
| 214 | FixedFncTexture8T = 225, | ||
| 215 | FixedFncTexture8R = 226, | ||
| 216 | FixedFncTexture8Q = 227, | ||
| 217 | FixedFncTexture9S = 228, | ||
| 218 | FixedFncTexture9T = 229, | ||
| 219 | FixedFncTexture9R = 230, | ||
| 220 | FixedFncTexture9Q = 231, | ||
| 221 | ViewportMask = 232, | ||
| 222 | FrontFace = 255, | ||
| 223 | }; | ||
| 224 | |||
| 225 | constexpr size_t NUM_GENERICS = 32; | ||
| 226 | |||
| 227 | [[nodiscard]] bool IsGeneric(Attribute attribute) noexcept; | ||
| 228 | |||
| 229 | [[nodiscard]] u32 GenericAttributeIndex(Attribute attribute); | ||
| 230 | |||
| 231 | [[nodiscard]] u32 GenericAttributeElement(Attribute attribute); | ||
| 232 | |||
| 233 | [[nodiscard]] std::string NameOf(Attribute attribute); | ||
| 234 | |||
| 235 | [[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept { | ||
| 236 | return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value); | ||
| 237 | } | ||
| 238 | |||
| 239 | } // namespace Shader::IR | ||
| 240 | |||
| 241 | template <> | ||
| 242 | struct fmt::formatter<Shader::IR::Attribute> { | ||
| 243 | constexpr auto parse(format_parse_context& ctx) { | ||
| 244 | return ctx.begin(); | ||
| 245 | } | ||
| 246 | template <typename FormatContext> | ||
| 247 | auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) { | ||
| 248 | return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute)); | ||
| 249 | } | ||
| 250 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp new file mode 100644 index 000000000..7c08b25ce --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <initializer_list> | ||
| 7 | #include <map> | ||
| 8 | #include <memory> | ||
| 9 | |||
| 10 | #include "common/bit_cast.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {} | ||
| 18 | |||
| 19 | Block::~Block() = default; | ||
| 20 | |||
| 21 | void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { | ||
| 22 | PrependNewInst(end(), op, args); | ||
| 23 | } | ||
| 24 | |||
| 25 | Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, | ||
| 26 | std::initializer_list<Value> args, u32 flags) { | ||
| 27 | Inst* const inst{inst_pool->Create(op, flags)}; | ||
| 28 | const auto result_it{instructions.insert(insertion_point, *inst)}; | ||
| 29 | |||
| 30 | if (inst->NumArgs() != args.size()) { | ||
| 31 | throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op); | ||
| 32 | } | ||
| 33 | std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable { | ||
| 34 | inst->SetArg(index, arg); | ||
| 35 | ++index; | ||
| 36 | }); | ||
| 37 | return result_it; | ||
| 38 | } | ||
| 39 | |||
| 40 | void Block::AddBranch(Block* block) { | ||
| 41 | if (std::ranges::find(imm_successors, block) != imm_successors.end()) { | ||
| 42 | throw LogicError("Successor already inserted"); | ||
| 43 | } | ||
| 44 | if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) { | ||
| 45 | throw LogicError("Predecessor already inserted"); | ||
| 46 | } | ||
| 47 | imm_successors.push_back(block); | ||
| 48 | block->imm_predecessors.push_back(this); | ||
| 49 | } | ||
| 50 | |||
| 51 | static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index, | ||
| 52 | Block* block) { | ||
| 53 | if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) { | ||
| 54 | return fmt::format("{{Block ${}}}", it->second); | ||
| 55 | } | ||
| 56 | return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block)); | ||
| 57 | } | ||
| 58 | |||
| 59 | static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, | ||
| 60 | const Inst* inst) { | ||
| 61 | const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)}; | ||
| 62 | if (is_inserted) { | ||
| 63 | ++inst_index; | ||
| 64 | } | ||
| 65 | return it->second; | ||
| 66 | } | ||
| 67 | |||
| 68 | static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index, | ||
| 69 | const Value& arg) { | ||
| 70 | if (arg.IsEmpty()) { | ||
| 71 | return "<null>"; | ||
| 72 | } | ||
| 73 | if (!arg.IsImmediate() || arg.IsIdentity()) { | ||
| 74 | return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst())); | ||
| 75 | } | ||
| 76 | switch (arg.Type()) { | ||
| 77 | case Type::U1: | ||
| 78 | return fmt::format("#{}", arg.U1() ? "true" : "false"); | ||
| 79 | case Type::U8: | ||
| 80 | return fmt::format("#{}", arg.U8()); | ||
| 81 | case Type::U16: | ||
| 82 | return fmt::format("#{}", arg.U16()); | ||
| 83 | case Type::U32: | ||
| 84 | return fmt::format("#{}", arg.U32()); | ||
| 85 | case Type::U64: | ||
| 86 | return fmt::format("#{}", arg.U64()); | ||
| 87 | case Type::F32: | ||
| 88 | return fmt::format("#{}", arg.F32()); | ||
| 89 | case Type::Reg: | ||
| 90 | return fmt::format("{}", arg.Reg()); | ||
| 91 | case Type::Pred: | ||
| 92 | return fmt::format("{}", arg.Pred()); | ||
| 93 | case Type::Attribute: | ||
| 94 | return fmt::format("{}", arg.Attribute()); | ||
| 95 | default: | ||
| 96 | return "<unknown immediate type>"; | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | std::string DumpBlock(const Block& block) { | ||
| 101 | size_t inst_index{0}; | ||
| 102 | std::map<const Inst*, size_t> inst_to_index; | ||
| 103 | return DumpBlock(block, {}, inst_to_index, inst_index); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index, | ||
| 107 | std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) { | ||
| 108 | std::string ret{"Block"}; | ||
| 109 | if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) { | ||
| 110 | ret += fmt::format(" ${}", it->second); | ||
| 111 | } | ||
| 112 | ret += '\n'; | ||
| 113 | for (const Inst& inst : block) { | ||
| 114 | const Opcode op{inst.GetOpcode()}; | ||
| 115 | ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst)); | ||
| 116 | if (TypeOf(op) != Type::Void) { | ||
| 117 | ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op); | ||
| 118 | } else { | ||
| 119 | ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces | ||
| 120 | } | ||
| 121 | const size_t arg_count{inst.NumArgs()}; | ||
| 122 | for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) { | ||
| 123 | const Value arg{inst.Arg(arg_index)}; | ||
| 124 | const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)}; | ||
| 125 | ret += arg_index != 0 ? ", " : " "; | ||
| 126 | if (op == Opcode::Phi) { | ||
| 127 | ret += fmt::format("[ {}, {} ]", arg_str, | ||
| 128 | BlockToIndex(block_to_index, inst.PhiBlock(arg_index))); | ||
| 129 | } else { | ||
| 130 | ret += arg_str; | ||
| 131 | } | ||
| 132 | if (op != Opcode::Phi) { | ||
| 133 | const Type actual_type{arg.Type()}; | ||
| 134 | const Type expected_type{ArgTypeOf(op, arg_index)}; | ||
| 135 | if (!AreTypesCompatible(actual_type, expected_type)) { | ||
| 136 | ret += fmt::format("<type error: {} != {}>", actual_type, expected_type); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | } | ||
| 140 | if (TypeOf(op) != Type::Void) { | ||
| 141 | ret += fmt::format(" (uses: {})\n", inst.UseCount()); | ||
| 142 | } else { | ||
| 143 | ret += '\n'; | ||
| 144 | } | ||
| 145 | } | ||
| 146 | return ret; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h new file mode 100644 index 000000000..7e134b4c7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/basic_block.h | |||
| @@ -0,0 +1,185 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <initializer_list> | ||
| 8 | #include <map> | ||
| 9 | #include <span> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <boost/intrusive/list.hpp> | ||
| 13 | |||
| 14 | #include "common/bit_cast.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 18 | #include "shader_recompiler/object_pool.h" | ||
| 19 | |||
| 20 | namespace Shader::IR { | ||
| 21 | |||
| 22 | class Block { | ||
| 23 | public: | ||
| 24 | using InstructionList = boost::intrusive::list<Inst>; | ||
| 25 | using size_type = InstructionList::size_type; | ||
| 26 | using iterator = InstructionList::iterator; | ||
| 27 | using const_iterator = InstructionList::const_iterator; | ||
| 28 | using reverse_iterator = InstructionList::reverse_iterator; | ||
| 29 | using const_reverse_iterator = InstructionList::const_reverse_iterator; | ||
| 30 | |||
| 31 | explicit Block(ObjectPool<Inst>& inst_pool_); | ||
| 32 | ~Block(); | ||
| 33 | |||
| 34 | Block(const Block&) = delete; | ||
| 35 | Block& operator=(const Block&) = delete; | ||
| 36 | |||
| 37 | Block(Block&&) = default; | ||
| 38 | Block& operator=(Block&&) = default; | ||
| 39 | |||
| 40 | /// Appends a new instruction to the end of this basic block. | ||
| 41 | void AppendNewInst(Opcode op, std::initializer_list<Value> args); | ||
| 42 | |||
| 43 | /// Prepends a new instruction to this basic block before the insertion point. | ||
| 44 | iterator PrependNewInst(iterator insertion_point, Opcode op, | ||
| 45 | std::initializer_list<Value> args = {}, u32 flags = 0); | ||
| 46 | |||
| 47 | /// Adds a new branch to this basic block. | ||
| 48 | void AddBranch(Block* block); | ||
| 49 | |||
| 50 | /// Gets a mutable reference to the instruction list for this basic block. | ||
| 51 | [[nodiscard]] InstructionList& Instructions() noexcept { | ||
| 52 | return instructions; | ||
| 53 | } | ||
| 54 | /// Gets an immutable reference to the instruction list for this basic block. | ||
| 55 | [[nodiscard]] const InstructionList& Instructions() const noexcept { | ||
| 56 | return instructions; | ||
| 57 | } | ||
| 58 | |||
| 59 | /// Gets an immutable span to the immediate predecessors. | ||
| 60 | [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept { | ||
| 61 | return imm_predecessors; | ||
| 62 | } | ||
| 63 | /// Gets an immutable span to the immediate successors. | ||
| 64 | [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept { | ||
| 65 | return imm_successors; | ||
| 66 | } | ||
| 67 | |||
| 68 | /// Intrusively store the host definition of this instruction. | ||
| 69 | template <typename DefinitionType> | ||
| 70 | void SetDefinition(DefinitionType def) { | ||
| 71 | definition = Common::BitCast<u32>(def); | ||
| 72 | } | ||
| 73 | |||
| 74 | /// Return the intrusively stored host definition of this instruction. | ||
| 75 | template <typename DefinitionType> | ||
| 76 | [[nodiscard]] DefinitionType Definition() const noexcept { | ||
| 77 | return Common::BitCast<DefinitionType>(definition); | ||
| 78 | } | ||
| 79 | |||
| 80 | void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept { | ||
| 81 | ssa_reg_values[RegIndex(reg)] = value; | ||
| 82 | } | ||
| 83 | const Value& SsaRegValue(IR::Reg reg) const noexcept { | ||
| 84 | return ssa_reg_values[RegIndex(reg)]; | ||
| 85 | } | ||
| 86 | |||
| 87 | void SsaSeal() noexcept { | ||
| 88 | is_ssa_sealed = true; | ||
| 89 | } | ||
| 90 | [[nodiscard]] bool IsSsaSealed() const noexcept { | ||
| 91 | return is_ssa_sealed; | ||
| 92 | } | ||
| 93 | |||
| 94 | [[nodiscard]] bool empty() const { | ||
| 95 | return instructions.empty(); | ||
| 96 | } | ||
| 97 | [[nodiscard]] size_type size() const { | ||
| 98 | return instructions.size(); | ||
| 99 | } | ||
| 100 | |||
| 101 | [[nodiscard]] Inst& front() { | ||
| 102 | return instructions.front(); | ||
| 103 | } | ||
| 104 | [[nodiscard]] const Inst& front() const { | ||
| 105 | return instructions.front(); | ||
| 106 | } | ||
| 107 | |||
| 108 | [[nodiscard]] Inst& back() { | ||
| 109 | return instructions.back(); | ||
| 110 | } | ||
| 111 | [[nodiscard]] const Inst& back() const { | ||
| 112 | return instructions.back(); | ||
| 113 | } | ||
| 114 | |||
| 115 | [[nodiscard]] iterator begin() { | ||
| 116 | return instructions.begin(); | ||
| 117 | } | ||
| 118 | [[nodiscard]] const_iterator begin() const { | ||
| 119 | return instructions.begin(); | ||
| 120 | } | ||
| 121 | [[nodiscard]] iterator end() { | ||
| 122 | return instructions.end(); | ||
| 123 | } | ||
| 124 | [[nodiscard]] const_iterator end() const { | ||
| 125 | return instructions.end(); | ||
| 126 | } | ||
| 127 | |||
| 128 | [[nodiscard]] reverse_iterator rbegin() { | ||
| 129 | return instructions.rbegin(); | ||
| 130 | } | ||
| 131 | [[nodiscard]] const_reverse_iterator rbegin() const { | ||
| 132 | return instructions.rbegin(); | ||
| 133 | } | ||
| 134 | [[nodiscard]] reverse_iterator rend() { | ||
| 135 | return instructions.rend(); | ||
| 136 | } | ||
| 137 | [[nodiscard]] const_reverse_iterator rend() const { | ||
| 138 | return instructions.rend(); | ||
| 139 | } | ||
| 140 | |||
| 141 | [[nodiscard]] const_iterator cbegin() const { | ||
| 142 | return instructions.cbegin(); | ||
| 143 | } | ||
| 144 | [[nodiscard]] const_iterator cend() const { | ||
| 145 | return instructions.cend(); | ||
| 146 | } | ||
| 147 | |||
| 148 | [[nodiscard]] const_reverse_iterator crbegin() const { | ||
| 149 | return instructions.crbegin(); | ||
| 150 | } | ||
| 151 | [[nodiscard]] const_reverse_iterator crend() const { | ||
| 152 | return instructions.crend(); | ||
| 153 | } | ||
| 154 | |||
| 155 | private: | ||
| 156 | /// Memory pool for instruction list | ||
| 157 | ObjectPool<Inst>* inst_pool; | ||
| 158 | |||
| 159 | /// List of instructions in this block | ||
| 160 | InstructionList instructions; | ||
| 161 | |||
| 162 | /// Block immediate predecessors | ||
| 163 | std::vector<Block*> imm_predecessors; | ||
| 164 | /// Block immediate successors | ||
| 165 | std::vector<Block*> imm_successors; | ||
| 166 | |||
| 167 | /// Intrusively store the value of a register in the block. | ||
| 168 | std::array<Value, NUM_REGS> ssa_reg_values; | ||
| 169 | /// Intrusively store if the block is sealed in the SSA pass. | ||
| 170 | bool is_ssa_sealed{false}; | ||
| 171 | |||
| 172 | /// Intrusively stored host definition of this block. | ||
| 173 | u32 definition{}; | ||
| 174 | }; | ||
| 175 | |||
| 176 | using BlockList = std::vector<Block*>; | ||
| 177 | |||
| 178 | [[nodiscard]] std::string DumpBlock(const Block& block); | ||
| 179 | |||
| 180 | [[nodiscard]] std::string DumpBlock(const Block& block, | ||
| 181 | const std::map<const Block*, size_t>& block_to_index, | ||
| 182 | std::map<const Inst*, size_t>& inst_to_index, | ||
| 183 | size_t& inst_index); | ||
| 184 | |||
| 185 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h new file mode 100644 index 000000000..a52ccbd58 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <queue> | ||
| 10 | |||
| 11 | #include <boost/container/small_vector.hpp> | ||
| 12 | |||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | template <typename Pred> | ||
| 18 | auto BreadthFirstSearch(const Value& value, Pred&& pred) | ||
| 19 | -> std::invoke_result_t<Pred, const Inst*> { | ||
| 20 | if (value.IsImmediate()) { | ||
| 21 | // Nothing to do with immediates | ||
| 22 | return std::nullopt; | ||
| 23 | } | ||
| 24 | // Breadth-first search visiting the right most arguments first | ||
| 25 | // Small vector has been determined from shaders in Super Smash Bros. Ultimate | ||
| 26 | boost::container::small_vector<const Inst*, 2> visited; | ||
| 27 | std::queue<const Inst*> queue; | ||
| 28 | queue.push(value.InstRecursive()); | ||
| 29 | |||
| 30 | while (!queue.empty()) { | ||
| 31 | // Pop one instruction from the queue | ||
| 32 | const Inst* const inst{queue.front()}; | ||
| 33 | queue.pop(); | ||
| 34 | if (const std::optional result = pred(inst)) { | ||
| 35 | // This is the instruction we were looking for | ||
| 36 | return result; | ||
| 37 | } | ||
| 38 | // Visit the right most arguments first | ||
| 39 | for (size_t arg = inst->NumArgs(); arg--;) { | ||
| 40 | const Value arg_value{inst->Arg(arg)}; | ||
| 41 | if (arg_value.IsImmediate()) { | ||
| 42 | continue; | ||
| 43 | } | ||
| 44 | // Queue instruction if it hasn't been visited | ||
| 45 | const Inst* const arg_inst{arg_value.InstRecursive()}; | ||
| 46 | if (std::ranges::find(visited, arg_inst) == visited.end()) { | ||
| 47 | visited.push_back(arg_inst); | ||
| 48 | queue.push(arg_inst); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | } | ||
| 52 | // SSA tree has been traversed and the result hasn't been found | ||
| 53 | return std::nullopt; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp new file mode 100644 index 000000000..fc18ea2a2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.cpp | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | std::string NameOf(Condition condition) { | ||
| 14 | std::string ret; | ||
| 15 | if (condition.GetFlowTest() != FlowTest::T) { | ||
| 16 | ret = fmt::to_string(condition.GetFlowTest()); | ||
| 17 | } | ||
| 18 | const auto [pred, negated]{condition.GetPred()}; | ||
| 19 | if (!ret.empty()) { | ||
| 20 | ret += '&'; | ||
| 21 | } | ||
| 22 | if (negated) { | ||
| 23 | ret += '!'; | ||
| 24 | } | ||
| 25 | ret += fmt::to_string(pred); | ||
| 26 | return ret; | ||
| 27 | } | ||
| 28 | |||
| 29 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h new file mode 100644 index 000000000..aa8597c60 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/condition.h | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <string> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/pred.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | |||
| 18 | class Condition { | ||
| 19 | public: | ||
| 20 | Condition() noexcept = default; | ||
| 21 | |||
| 22 | explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept | ||
| 23 | : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)}, | ||
| 24 | pred_negated{pred_negated_ ? u8{1} : u8{0}} {} | ||
| 25 | |||
| 26 | explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept | ||
| 27 | : Condition(FlowTest::T, pred_, pred_negated_) {} | ||
| 28 | |||
| 29 | explicit Condition(bool value) : Condition(Pred::PT, !value) {} | ||
| 30 | |||
| 31 | auto operator<=>(const Condition&) const noexcept = default; | ||
| 32 | |||
| 33 | [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept { | ||
| 34 | return static_cast<IR::FlowTest>(flow_test); | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept { | ||
| 38 | return {static_cast<IR::Pred>(pred), pred_negated != 0}; | ||
| 39 | } | ||
| 40 | |||
| 41 | private: | ||
| 42 | u16 flow_test; | ||
| 43 | u8 pred; | ||
| 44 | u8 pred_negated; | ||
| 45 | }; | ||
| 46 | |||
| 47 | std::string NameOf(Condition condition); | ||
| 48 | |||
| 49 | } // namespace Shader::IR | ||
| 50 | |||
| 51 | template <> | ||
| 52 | struct fmt::formatter<Shader::IR::Condition> { | ||
| 53 | constexpr auto parse(format_parse_context& ctx) { | ||
| 54 | return ctx.begin(); | ||
| 55 | } | ||
| 56 | template <typename FormatContext> | ||
| 57 | auto format(const Shader::IR::Condition& cond, FormatContext& ctx) { | ||
| 58 | return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond)); | ||
| 59 | } | ||
| 60 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp new file mode 100644 index 000000000..6ebb4ad89 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.cpp | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | std::string NameOf(FlowTest flow_test) { | ||
| 14 | switch (flow_test) { | ||
| 15 | case FlowTest::F: | ||
| 16 | return "F"; | ||
| 17 | case FlowTest::LT: | ||
| 18 | return "LT"; | ||
| 19 | case FlowTest::EQ: | ||
| 20 | return "EQ"; | ||
| 21 | case FlowTest::LE: | ||
| 22 | return "LE"; | ||
| 23 | case FlowTest::GT: | ||
| 24 | return "GT"; | ||
| 25 | case FlowTest::NE: | ||
| 26 | return "NE"; | ||
| 27 | case FlowTest::GE: | ||
| 28 | return "GE"; | ||
| 29 | case FlowTest::NUM: | ||
| 30 | return "NUM"; | ||
| 31 | case FlowTest::NaN: | ||
| 32 | return "NAN"; | ||
| 33 | case FlowTest::LTU: | ||
| 34 | return "LTU"; | ||
| 35 | case FlowTest::EQU: | ||
| 36 | return "EQU"; | ||
| 37 | case FlowTest::LEU: | ||
| 38 | return "LEU"; | ||
| 39 | case FlowTest::GTU: | ||
| 40 | return "GTU"; | ||
| 41 | case FlowTest::NEU: | ||
| 42 | return "NEU"; | ||
| 43 | case FlowTest::GEU: | ||
| 44 | return "GEU"; | ||
| 45 | case FlowTest::T: | ||
| 46 | return "T"; | ||
| 47 | case FlowTest::OFF: | ||
| 48 | return "OFF"; | ||
| 49 | case FlowTest::LO: | ||
| 50 | return "LO"; | ||
| 51 | case FlowTest::SFF: | ||
| 52 | return "SFF"; | ||
| 53 | case FlowTest::LS: | ||
| 54 | return "LS"; | ||
| 55 | case FlowTest::HI: | ||
| 56 | return "HI"; | ||
| 57 | case FlowTest::SFT: | ||
| 58 | return "SFT"; | ||
| 59 | case FlowTest::HS: | ||
| 60 | return "HS"; | ||
| 61 | case FlowTest::OFT: | ||
| 62 | return "OFT"; | ||
| 63 | case FlowTest::CSM_TA: | ||
| 64 | return "CSM_TA"; | ||
| 65 | case FlowTest::CSM_TR: | ||
| 66 | return "CSM_TR"; | ||
| 67 | case FlowTest::CSM_MX: | ||
| 68 | return "CSM_MX"; | ||
| 69 | case FlowTest::FCSM_TA: | ||
| 70 | return "FCSM_TA"; | ||
| 71 | case FlowTest::FCSM_TR: | ||
| 72 | return "FCSM_TR"; | ||
| 73 | case FlowTest::FCSM_MX: | ||
| 74 | return "FCSM_MX"; | ||
| 75 | case FlowTest::RLE: | ||
| 76 | return "RLE"; | ||
| 77 | case FlowTest::RGT: | ||
| 78 | return "RGT"; | ||
| 79 | } | ||
| 80 | return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test)); | ||
| 81 | } | ||
| 82 | |||
| 83 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h new file mode 100644 index 000000000..09e113773 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/flow_test.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Shader::IR { | ||
| 13 | |||
| 14 | enum class FlowTest : u64 { | ||
| 15 | F, | ||
| 16 | LT, | ||
| 17 | EQ, | ||
| 18 | LE, | ||
| 19 | GT, | ||
| 20 | NE, | ||
| 21 | GE, | ||
| 22 | NUM, | ||
| 23 | NaN, | ||
| 24 | LTU, | ||
| 25 | EQU, | ||
| 26 | LEU, | ||
| 27 | GTU, | ||
| 28 | NEU, | ||
| 29 | GEU, | ||
| 30 | T, | ||
| 31 | OFF, | ||
| 32 | LO, | ||
| 33 | SFF, | ||
| 34 | LS, | ||
| 35 | HI, | ||
| 36 | SFT, | ||
| 37 | HS, | ||
| 38 | OFT, | ||
| 39 | CSM_TA, | ||
| 40 | CSM_TR, | ||
| 41 | CSM_MX, | ||
| 42 | FCSM_TA, | ||
| 43 | FCSM_TR, | ||
| 44 | FCSM_MX, | ||
| 45 | RLE, | ||
| 46 | RGT, | ||
| 47 | }; | ||
| 48 | |||
| 49 | [[nodiscard]] std::string NameOf(FlowTest flow_test); | ||
| 50 | |||
| 51 | } // namespace Shader::IR | ||
| 52 | |||
| 53 | template <> | ||
| 54 | struct fmt::formatter<Shader::IR::FlowTest> { | ||
| 55 | constexpr auto parse(format_parse_context& ctx) { | ||
| 56 | return ctx.begin(); | ||
| 57 | } | ||
| 58 | template <typename FormatContext> | ||
| 59 | auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) { | ||
| 60 | return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test)); | ||
| 61 | } | ||
| 62 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp new file mode 100644 index 000000000..13159a68d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -0,0 +1,2017 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_cast.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | namespace { | ||
| 11 | [[noreturn]] void ThrowInvalidType(Type type) { | ||
| 12 | throw InvalidArgument("Invalid type {}", type); | ||
| 13 | } | ||
| 14 | |||
| 15 | Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) { | ||
| 16 | if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) { | ||
| 17 | return ir.CompositeConstruct(bias_lod, lod_clamp); | ||
| 18 | } else if (!bias_lod.IsEmpty()) { | ||
| 19 | return bias_lod; | ||
| 20 | } else if (!lod_clamp.IsEmpty()) { | ||
| 21 | return lod_clamp; | ||
| 22 | } else { | ||
| 23 | return Value{}; | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | U1 IREmitter::Imm1(bool value) const { | ||
| 29 | return U1{Value{value}}; | ||
| 30 | } | ||
| 31 | |||
| 32 | U8 IREmitter::Imm8(u8 value) const { | ||
| 33 | return U8{Value{value}}; | ||
| 34 | } | ||
| 35 | |||
| 36 | U16 IREmitter::Imm16(u16 value) const { | ||
| 37 | return U16{Value{value}}; | ||
| 38 | } | ||
| 39 | |||
| 40 | U32 IREmitter::Imm32(u32 value) const { | ||
| 41 | return U32{Value{value}}; | ||
| 42 | } | ||
| 43 | |||
| 44 | U32 IREmitter::Imm32(s32 value) const { | ||
| 45 | return U32{Value{static_cast<u32>(value)}}; | ||
| 46 | } | ||
| 47 | |||
| 48 | F32 IREmitter::Imm32(f32 value) const { | ||
| 49 | return F32{Value{value}}; | ||
| 50 | } | ||
| 51 | |||
| 52 | U64 IREmitter::Imm64(u64 value) const { | ||
| 53 | return U64{Value{value}}; | ||
| 54 | } | ||
| 55 | |||
| 56 | U64 IREmitter::Imm64(s64 value) const { | ||
| 57 | return U64{Value{static_cast<u64>(value)}}; | ||
| 58 | } | ||
| 59 | |||
| 60 | F64 IREmitter::Imm64(f64 value) const { | ||
| 61 | return F64{Value{value}}; | ||
| 62 | } | ||
| 63 | |||
| 64 | U1 IREmitter::ConditionRef(const U1& value) { | ||
| 65 | return Inst<U1>(Opcode::ConditionRef, value); | ||
| 66 | } | ||
| 67 | |||
| 68 | void IREmitter::Reference(const Value& value) { | ||
| 69 | Inst(Opcode::Reference, value); | ||
| 70 | } | ||
| 71 | |||
| 72 | void IREmitter::PhiMove(IR::Inst& phi, const Value& value) { | ||
| 73 | Inst(Opcode::PhiMove, Value{&phi}, value); | ||
| 74 | } | ||
| 75 | |||
| 76 | void IREmitter::Prologue() { | ||
| 77 | Inst(Opcode::Prologue); | ||
| 78 | } | ||
| 79 | |||
| 80 | void IREmitter::Epilogue() { | ||
| 81 | Inst(Opcode::Epilogue); | ||
| 82 | } | ||
| 83 | |||
| 84 | void IREmitter::DemoteToHelperInvocation() { | ||
| 85 | Inst(Opcode::DemoteToHelperInvocation); | ||
| 86 | } | ||
| 87 | |||
| 88 | void IREmitter::EmitVertex(const U32& stream) { | ||
| 89 | Inst(Opcode::EmitVertex, stream); | ||
| 90 | } | ||
| 91 | |||
| 92 | void IREmitter::EndPrimitive(const U32& stream) { | ||
| 93 | Inst(Opcode::EndPrimitive, stream); | ||
| 94 | } | ||
| 95 | |||
| 96 | void IREmitter::Barrier() { | ||
| 97 | Inst(Opcode::Barrier); | ||
| 98 | } | ||
| 99 | |||
| 100 | void IREmitter::WorkgroupMemoryBarrier() { | ||
| 101 | Inst(Opcode::WorkgroupMemoryBarrier); | ||
| 102 | } | ||
| 103 | |||
| 104 | void IREmitter::DeviceMemoryBarrier() { | ||
| 105 | Inst(Opcode::DeviceMemoryBarrier); | ||
| 106 | } | ||
| 107 | |||
| 108 | U32 IREmitter::GetReg(IR::Reg reg) { | ||
| 109 | return Inst<U32>(Opcode::GetRegister, reg); | ||
| 110 | } | ||
| 111 | |||
| 112 | void IREmitter::SetReg(IR::Reg reg, const U32& value) { | ||
| 113 | Inst(Opcode::SetRegister, reg, value); | ||
| 114 | } | ||
| 115 | |||
| 116 | U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) { | ||
| 117 | if (pred == Pred::PT) { | ||
| 118 | return Imm1(!is_negated); | ||
| 119 | } | ||
| 120 | const U1 value{Inst<U1>(Opcode::GetPred, pred)}; | ||
| 121 | if (is_negated) { | ||
| 122 | return Inst<U1>(Opcode::LogicalNot, value); | ||
| 123 | } else { | ||
| 124 | return value; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void IREmitter::SetPred(IR::Pred pred, const U1& value) { | ||
| 129 | if (pred != IR::Pred::PT) { | ||
| 130 | Inst(Opcode::SetPred, pred, value); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | U1 IREmitter::GetGotoVariable(u32 id) { | ||
| 135 | return Inst<U1>(Opcode::GetGotoVariable, id); | ||
| 136 | } | ||
| 137 | |||
| 138 | void IREmitter::SetGotoVariable(u32 id, const U1& value) { | ||
| 139 | Inst(Opcode::SetGotoVariable, id, value); | ||
| 140 | } | ||
| 141 | |||
| 142 | U32 IREmitter::GetIndirectBranchVariable() { | ||
| 143 | return Inst<U32>(Opcode::GetIndirectBranchVariable); | ||
| 144 | } | ||
| 145 | |||
| 146 | void IREmitter::SetIndirectBranchVariable(const U32& value) { | ||
| 147 | Inst(Opcode::SetIndirectBranchVariable, value); | ||
| 148 | } | ||
| 149 | |||
| 150 | U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) { | ||
| 151 | return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); | ||
| 152 | } | ||
| 153 | |||
| 154 | Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, | ||
| 155 | bool is_signed) { | ||
| 156 | switch (bitsize) { | ||
| 157 | case 8: | ||
| 158 | return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset); | ||
| 159 | case 16: | ||
| 160 | return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset); | ||
| 161 | case 32: | ||
| 162 | return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset); | ||
| 163 | case 64: | ||
| 164 | return Inst(Opcode::GetCbufU32x2, binding, byte_offset); | ||
| 165 | default: | ||
| 166 | throw InvalidArgument("Invalid bit size {}", bitsize); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) { | ||
| 171 | return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset); | ||
| 172 | } | ||
| 173 | |||
| 174 | U1 IREmitter::GetZFlag() { | ||
| 175 | return Inst<U1>(Opcode::GetZFlag); | ||
| 176 | } | ||
| 177 | |||
| 178 | U1 IREmitter::GetSFlag() { | ||
| 179 | return Inst<U1>(Opcode::GetSFlag); | ||
| 180 | } | ||
| 181 | |||
| 182 | U1 IREmitter::GetCFlag() { | ||
| 183 | return Inst<U1>(Opcode::GetCFlag); | ||
| 184 | } | ||
| 185 | |||
| 186 | U1 IREmitter::GetOFlag() { | ||
| 187 | return Inst<U1>(Opcode::GetOFlag); | ||
| 188 | } | ||
| 189 | |||
| 190 | void IREmitter::SetZFlag(const U1& value) { | ||
| 191 | Inst(Opcode::SetZFlag, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | void IREmitter::SetSFlag(const U1& value) { | ||
| 195 | Inst(Opcode::SetSFlag, value); | ||
| 196 | } | ||
| 197 | |||
| 198 | void IREmitter::SetCFlag(const U1& value) { | ||
| 199 | Inst(Opcode::SetCFlag, value); | ||
| 200 | } | ||
| 201 | |||
| 202 | void IREmitter::SetOFlag(const U1& value) { | ||
| 203 | Inst(Opcode::SetOFlag, value); | ||
| 204 | } | ||
| 205 | |||
| 206 | static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) { | ||
| 207 | switch (flow_test) { | ||
| 208 | case FlowTest::F: | ||
| 209 | return ir.Imm1(false); | ||
| 210 | case FlowTest::LT: | ||
| 211 | return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())), | ||
| 212 | ir.GetOFlag()); | ||
| 213 | case FlowTest::EQ: | ||
| 214 | return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()); | ||
| 215 | case FlowTest::LE: | ||
| 216 | return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); | ||
| 217 | case FlowTest::GT: | ||
| 218 | return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()), | ||
| 219 | ir.LogicalNot(ir.GetZFlag())); | ||
| 220 | case FlowTest::NE: | ||
| 221 | return ir.LogicalNot(ir.GetZFlag()); | ||
| 222 | case FlowTest::GE: | ||
| 223 | return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag())); | ||
| 224 | case FlowTest::NUM: | ||
| 225 | return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); | ||
| 226 | case FlowTest::NaN: | ||
| 227 | return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag()); | ||
| 228 | case FlowTest::LTU: | ||
| 229 | return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()); | ||
| 230 | case FlowTest::EQU: | ||
| 231 | return ir.GetZFlag(); | ||
| 232 | case FlowTest::LEU: | ||
| 233 | return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag()); | ||
| 234 | case FlowTest::GTU: | ||
| 235 | return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), | ||
| 236 | ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag())); | ||
| 237 | case FlowTest::NEU: | ||
| 238 | return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())); | ||
| 239 | case FlowTest::GEU: | ||
| 240 | return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()), | ||
| 241 | ir.GetOFlag()); | ||
| 242 | case FlowTest::T: | ||
| 243 | return ir.Imm1(true); | ||
| 244 | case FlowTest::OFF: | ||
| 245 | return ir.LogicalNot(ir.GetOFlag()); | ||
| 246 | case FlowTest::LO: | ||
| 247 | return ir.LogicalNot(ir.GetCFlag()); | ||
| 248 | case FlowTest::SFF: | ||
| 249 | return ir.LogicalNot(ir.GetSFlag()); | ||
| 250 | case FlowTest::LS: | ||
| 251 | return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag())); | ||
| 252 | case FlowTest::HI: | ||
| 253 | return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag())); | ||
| 254 | case FlowTest::SFT: | ||
| 255 | return ir.GetSFlag(); | ||
| 256 | case FlowTest::HS: | ||
| 257 | return ir.GetCFlag(); | ||
| 258 | case FlowTest::OFT: | ||
| 259 | return ir.GetOFlag(); | ||
| 260 | case FlowTest::RLE: | ||
| 261 | return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag()); | ||
| 262 | case FlowTest::RGT: | ||
| 263 | return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag())); | ||
| 264 | case FlowTest::FCSM_TR: | ||
| 265 | LOG_WARNING(Shader, "(STUBBED) FCSM_TR"); | ||
| 266 | return ir.Imm1(false); | ||
| 267 | case FlowTest::CSM_TA: | ||
| 268 | case FlowTest::CSM_TR: | ||
| 269 | case FlowTest::CSM_MX: | ||
| 270 | case FlowTest::FCSM_TA: | ||
| 271 | case FlowTest::FCSM_MX: | ||
| 272 | default: | ||
| 273 | throw NotImplementedException("Flow test {}", flow_test); | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | U1 IREmitter::Condition(IR::Condition cond) { | ||
| 278 | const FlowTest flow_test{cond.GetFlowTest()}; | ||
| 279 | const auto [pred, is_negated]{cond.GetPred()}; | ||
| 280 | if (flow_test == FlowTest::T) { | ||
| 281 | return GetPred(pred, is_negated); | ||
| 282 | } | ||
| 283 | return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test)); | ||
| 284 | } | ||
| 285 | |||
| 286 | U1 IREmitter::GetFlowTestResult(FlowTest test) { | ||
| 287 | return GetFlowTest(*this, test); | ||
| 288 | } | ||
| 289 | |||
| 290 | F32 IREmitter::GetAttribute(IR::Attribute attribute) { | ||
| 291 | return GetAttribute(attribute, Imm32(0)); | ||
| 292 | } | ||
| 293 | |||
| 294 | F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | ||
| 295 | return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | ||
| 296 | } | ||
| 297 | |||
| 298 | void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | ||
| 299 | Inst(Opcode::SetAttribute, attribute, value, vertex); | ||
| 300 | } | ||
| 301 | |||
| 302 | F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { | ||
| 303 | return GetAttributeIndexed(phys_address, Imm32(0)); | ||
| 304 | } | ||
| 305 | |||
| 306 | F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { | ||
| 307 | return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex); | ||
| 308 | } | ||
| 309 | |||
| 310 | void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { | ||
| 311 | Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); | ||
| 312 | } | ||
| 313 | |||
| 314 | F32 IREmitter::GetPatch(Patch patch) { | ||
| 315 | return Inst<F32>(Opcode::GetPatch, patch); | ||
| 316 | } | ||
| 317 | |||
| 318 | void IREmitter::SetPatch(Patch patch, const F32& value) { | ||
| 319 | Inst(Opcode::SetPatch, patch, value); | ||
| 320 | } | ||
| 321 | |||
| 322 | void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { | ||
| 323 | Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value); | ||
| 324 | } | ||
| 325 | |||
| 326 | void IREmitter::SetSampleMask(const U32& value) { | ||
| 327 | Inst(Opcode::SetSampleMask, value); | ||
| 328 | } | ||
| 329 | |||
| 330 | void IREmitter::SetFragDepth(const F32& value) { | ||
| 331 | Inst(Opcode::SetFragDepth, value); | ||
| 332 | } | ||
| 333 | |||
| 334 | U32 IREmitter::WorkgroupIdX() { | ||
| 335 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)}; | ||
| 336 | } | ||
| 337 | |||
| 338 | U32 IREmitter::WorkgroupIdY() { | ||
| 339 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)}; | ||
| 340 | } | ||
| 341 | |||
| 342 | U32 IREmitter::WorkgroupIdZ() { | ||
| 343 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; | ||
| 344 | } | ||
| 345 | |||
| 346 | Value IREmitter::LocalInvocationId() { | ||
| 347 | return Inst(Opcode::LocalInvocationId); | ||
| 348 | } | ||
| 349 | |||
| 350 | U32 IREmitter::LocalInvocationIdX() { | ||
| 351 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; | ||
| 352 | } | ||
| 353 | |||
| 354 | U32 IREmitter::LocalInvocationIdY() { | ||
| 355 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)}; | ||
| 356 | } | ||
| 357 | |||
| 358 | U32 IREmitter::LocalInvocationIdZ() { | ||
| 359 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)}; | ||
| 360 | } | ||
| 361 | |||
| 362 | U32 IREmitter::InvocationId() { | ||
| 363 | return Inst<U32>(Opcode::InvocationId); | ||
| 364 | } | ||
| 365 | |||
| 366 | U32 IREmitter::SampleId() { | ||
| 367 | return Inst<U32>(Opcode::SampleId); | ||
| 368 | } | ||
| 369 | |||
| 370 | U1 IREmitter::IsHelperInvocation() { | ||
| 371 | return Inst<U1>(Opcode::IsHelperInvocation); | ||
| 372 | } | ||
| 373 | |||
| 374 | F32 IREmitter::YDirection() { | ||
| 375 | return Inst<F32>(Opcode::YDirection); | ||
| 376 | } | ||
| 377 | |||
| 378 | U32 IREmitter::LaneId() { | ||
| 379 | return Inst<U32>(Opcode::LaneId); | ||
| 380 | } | ||
| 381 | |||
| 382 | U32 IREmitter::LoadGlobalU8(const U64& address) { | ||
| 383 | return Inst<U32>(Opcode::LoadGlobalU8, address); | ||
| 384 | } | ||
| 385 | |||
| 386 | U32 IREmitter::LoadGlobalS8(const U64& address) { | ||
| 387 | return Inst<U32>(Opcode::LoadGlobalS8, address); | ||
| 388 | } | ||
| 389 | |||
| 390 | U32 IREmitter::LoadGlobalU16(const U64& address) { | ||
| 391 | return Inst<U32>(Opcode::LoadGlobalU16, address); | ||
| 392 | } | ||
| 393 | |||
| 394 | U32 IREmitter::LoadGlobalS16(const U64& address) { | ||
| 395 | return Inst<U32>(Opcode::LoadGlobalS16, address); | ||
| 396 | } | ||
| 397 | |||
| 398 | U32 IREmitter::LoadGlobal32(const U64& address) { | ||
| 399 | return Inst<U32>(Opcode::LoadGlobal32, address); | ||
| 400 | } | ||
| 401 | |||
| 402 | Value IREmitter::LoadGlobal64(const U64& address) { | ||
| 403 | return Inst<Value>(Opcode::LoadGlobal64, address); | ||
| 404 | } | ||
| 405 | |||
| 406 | Value IREmitter::LoadGlobal128(const U64& address) { | ||
| 407 | return Inst<Value>(Opcode::LoadGlobal128, address); | ||
| 408 | } | ||
| 409 | |||
| 410 | void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { | ||
| 411 | Inst(Opcode::WriteGlobalU8, address, value); | ||
| 412 | } | ||
| 413 | |||
| 414 | void IREmitter::WriteGlobalS8(const U64& address, const U32& value) { | ||
| 415 | Inst(Opcode::WriteGlobalS8, address, value); | ||
| 416 | } | ||
| 417 | |||
| 418 | void IREmitter::WriteGlobalU16(const U64& address, const U32& value) { | ||
| 419 | Inst(Opcode::WriteGlobalU16, address, value); | ||
| 420 | } | ||
| 421 | |||
| 422 | void IREmitter::WriteGlobalS16(const U64& address, const U32& value) { | ||
| 423 | Inst(Opcode::WriteGlobalS16, address, value); | ||
| 424 | } | ||
| 425 | |||
| 426 | void IREmitter::WriteGlobal32(const U64& address, const U32& value) { | ||
| 427 | Inst(Opcode::WriteGlobal32, address, value); | ||
| 428 | } | ||
| 429 | |||
| 430 | void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) { | ||
| 431 | Inst(Opcode::WriteGlobal64, address, vector); | ||
| 432 | } | ||
| 433 | |||
| 434 | void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) { | ||
| 435 | Inst(Opcode::WriteGlobal128, address, vector); | ||
| 436 | } | ||
| 437 | |||
| 438 | U32 IREmitter::LoadLocal(const IR::U32& word_offset) { | ||
| 439 | return Inst<U32>(Opcode::LoadLocal, word_offset); | ||
| 440 | } | ||
| 441 | |||
| 442 | void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) { | ||
| 443 | Inst(Opcode::WriteLocal, word_offset, value); | ||
| 444 | } | ||
| 445 | |||
| 446 | Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) { | ||
| 447 | switch (bit_size) { | ||
| 448 | case 8: | ||
| 449 | return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset); | ||
| 450 | case 16: | ||
| 451 | return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset); | ||
| 452 | case 32: | ||
| 453 | return Inst(Opcode::LoadSharedU32, offset); | ||
| 454 | case 64: | ||
| 455 | return Inst(Opcode::LoadSharedU64, offset); | ||
| 456 | case 128: | ||
| 457 | return Inst(Opcode::LoadSharedU128, offset); | ||
| 458 | } | ||
| 459 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 460 | } | ||
| 461 | |||
| 462 | void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) { | ||
| 463 | switch (bit_size) { | ||
| 464 | case 8: | ||
| 465 | Inst(Opcode::WriteSharedU8, offset, value); | ||
| 466 | break; | ||
| 467 | case 16: | ||
| 468 | Inst(Opcode::WriteSharedU16, offset, value); | ||
| 469 | break; | ||
| 470 | case 32: | ||
| 471 | Inst(Opcode::WriteSharedU32, offset, value); | ||
| 472 | break; | ||
| 473 | case 64: | ||
| 474 | Inst(Opcode::WriteSharedU64, offset, value); | ||
| 475 | break; | ||
| 476 | case 128: | ||
| 477 | Inst(Opcode::WriteSharedU128, offset, value); | ||
| 478 | break; | ||
| 479 | default: | ||
| 480 | throw InvalidArgument("Invalid bit size {}", bit_size); | ||
| 481 | } | ||
| 482 | } | ||
| 483 | |||
| 484 | U1 IREmitter::GetZeroFromOp(const Value& op) { | ||
| 485 | return Inst<U1>(Opcode::GetZeroFromOp, op); | ||
| 486 | } | ||
| 487 | |||
| 488 | U1 IREmitter::GetSignFromOp(const Value& op) { | ||
| 489 | return Inst<U1>(Opcode::GetSignFromOp, op); | ||
| 490 | } | ||
| 491 | |||
| 492 | U1 IREmitter::GetCarryFromOp(const Value& op) { | ||
| 493 | return Inst<U1>(Opcode::GetCarryFromOp, op); | ||
| 494 | } | ||
| 495 | |||
| 496 | U1 IREmitter::GetOverflowFromOp(const Value& op) { | ||
| 497 | return Inst<U1>(Opcode::GetOverflowFromOp, op); | ||
| 498 | } | ||
| 499 | |||
| 500 | U1 IREmitter::GetSparseFromOp(const Value& op) { | ||
| 501 | return Inst<U1>(Opcode::GetSparseFromOp, op); | ||
| 502 | } | ||
| 503 | |||
| 504 | U1 IREmitter::GetInBoundsFromOp(const Value& op) { | ||
| 505 | return Inst<U1>(Opcode::GetInBoundsFromOp, op); | ||
| 506 | } | ||
| 507 | |||
| 508 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||
| 509 | if (a.Type() != b.Type()) { | ||
| 510 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 511 | } | ||
| 512 | switch (a.Type()) { | ||
| 513 | case Type::F16: | ||
| 514 | return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b); | ||
| 515 | case Type::F32: | ||
| 516 | return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b); | ||
| 517 | case Type::F64: | ||
| 518 | return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b); | ||
| 519 | default: | ||
| 520 | ThrowInvalidType(a.Type()); | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 524 | Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { | ||
| 525 | if (e1.Type() != e2.Type()) { | ||
| 526 | throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); | ||
| 527 | } | ||
| 528 | switch (e1.Type()) { | ||
| 529 | case Type::U32: | ||
| 530 | return Inst(Opcode::CompositeConstructU32x2, e1, e2); | ||
| 531 | case Type::F16: | ||
| 532 | return Inst(Opcode::CompositeConstructF16x2, e1, e2); | ||
| 533 | case Type::F32: | ||
| 534 | return Inst(Opcode::CompositeConstructF32x2, e1, e2); | ||
| 535 | case Type::F64: | ||
| 536 | return Inst(Opcode::CompositeConstructF64x2, e1, e2); | ||
| 537 | default: | ||
| 538 | ThrowInvalidType(e1.Type()); | ||
| 539 | } | ||
| 540 | } | ||
| 541 | |||
| 542 | Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) { | ||
| 543 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { | ||
| 544 | throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); | ||
| 545 | } | ||
| 546 | switch (e1.Type()) { | ||
| 547 | case Type::U32: | ||
| 548 | return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3); | ||
| 549 | case Type::F16: | ||
| 550 | return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3); | ||
| 551 | case Type::F32: | ||
| 552 | return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3); | ||
| 553 | case Type::F64: | ||
| 554 | return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3); | ||
| 555 | default: | ||
| 556 | ThrowInvalidType(e1.Type()); | ||
| 557 | } | ||
| 558 | } | ||
| 559 | |||
| 560 | Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | ||
| 561 | const Value& e4) { | ||
| 562 | if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { | ||
| 563 | throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), | ||
| 564 | e3.Type(), e4.Type()); | ||
| 565 | } | ||
| 566 | switch (e1.Type()) { | ||
| 567 | case Type::U32: | ||
| 568 | return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); | ||
| 569 | case Type::F16: | ||
| 570 | return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4); | ||
| 571 | case Type::F32: | ||
| 572 | return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); | ||
| 573 | case Type::F64: | ||
| 574 | return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); | ||
| 575 | default: | ||
| 576 | ThrowInvalidType(e1.Type()); | ||
| 577 | } | ||
| 578 | } | ||
| 579 | |||
| 580 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | ||
| 581 | const auto read{[&](Opcode opcode, size_t limit) -> Value { | ||
| 582 | if (element >= limit) { | ||
| 583 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 584 | } | ||
| 585 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); | ||
| 586 | }}; | ||
| 587 | switch (vector.Type()) { | ||
| 588 | case Type::U32x2: | ||
| 589 | return read(Opcode::CompositeExtractU32x2, 2); | ||
| 590 | case Type::U32x3: | ||
| 591 | return read(Opcode::CompositeExtractU32x3, 3); | ||
| 592 | case Type::U32x4: | ||
| 593 | return read(Opcode::CompositeExtractU32x4, 4); | ||
| 594 | case Type::F16x2: | ||
| 595 | return read(Opcode::CompositeExtractF16x2, 2); | ||
| 596 | case Type::F16x3: | ||
| 597 | return read(Opcode::CompositeExtractF16x3, 3); | ||
| 598 | case Type::F16x4: | ||
| 599 | return read(Opcode::CompositeExtractF16x4, 4); | ||
| 600 | case Type::F32x2: | ||
| 601 | return read(Opcode::CompositeExtractF32x2, 2); | ||
| 602 | case Type::F32x3: | ||
| 603 | return read(Opcode::CompositeExtractF32x3, 3); | ||
| 604 | case Type::F32x4: | ||
| 605 | return read(Opcode::CompositeExtractF32x4, 4); | ||
| 606 | case Type::F64x2: | ||
| 607 | return read(Opcode::CompositeExtractF64x2, 2); | ||
| 608 | case Type::F64x3: | ||
| 609 | return read(Opcode::CompositeExtractF64x3, 3); | ||
| 610 | case Type::F64x4: | ||
| 611 | return read(Opcode::CompositeExtractF64x4, 4); | ||
| 612 | default: | ||
| 613 | ThrowInvalidType(vector.Type()); | ||
| 614 | } | ||
| 615 | } | ||
| 616 | |||
| 617 | Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { | ||
| 618 | const auto insert{[&](Opcode opcode, size_t limit) { | ||
| 619 | if (element >= limit) { | ||
| 620 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 621 | } | ||
| 622 | return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); | ||
| 623 | }}; | ||
| 624 | switch (vector.Type()) { | ||
| 625 | case Type::U32x2: | ||
| 626 | return insert(Opcode::CompositeInsertU32x2, 2); | ||
| 627 | case Type::U32x3: | ||
| 628 | return insert(Opcode::CompositeInsertU32x3, 3); | ||
| 629 | case Type::U32x4: | ||
| 630 | return insert(Opcode::CompositeInsertU32x4, 4); | ||
| 631 | case Type::F16x2: | ||
| 632 | return insert(Opcode::CompositeInsertF16x2, 2); | ||
| 633 | case Type::F16x3: | ||
| 634 | return insert(Opcode::CompositeInsertF16x3, 3); | ||
| 635 | case Type::F16x4: | ||
| 636 | return insert(Opcode::CompositeInsertF16x4, 4); | ||
| 637 | case Type::F32x2: | ||
| 638 | return insert(Opcode::CompositeInsertF32x2, 2); | ||
| 639 | case Type::F32x3: | ||
| 640 | return insert(Opcode::CompositeInsertF32x3, 3); | ||
| 641 | case Type::F32x4: | ||
| 642 | return insert(Opcode::CompositeInsertF32x4, 4); | ||
| 643 | case Type::F64x2: | ||
| 644 | return insert(Opcode::CompositeInsertF64x2, 2); | ||
| 645 | case Type::F64x3: | ||
| 646 | return insert(Opcode::CompositeInsertF64x3, 3); | ||
| 647 | case Type::F64x4: | ||
| 648 | return insert(Opcode::CompositeInsertF64x4, 4); | ||
| 649 | default: | ||
| 650 | ThrowInvalidType(vector.Type()); | ||
| 651 | } | ||
| 652 | } | ||
| 653 | |||
| 654 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { | ||
| 655 | if (true_value.Type() != false_value.Type()) { | ||
| 656 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | ||
| 657 | } | ||
| 658 | switch (true_value.Type()) { | ||
| 659 | case Type::U1: | ||
| 660 | return Inst(Opcode::SelectU1, condition, true_value, false_value); | ||
| 661 | case Type::U8: | ||
| 662 | return Inst(Opcode::SelectU8, condition, true_value, false_value); | ||
| 663 | case Type::U16: | ||
| 664 | return Inst(Opcode::SelectU16, condition, true_value, false_value); | ||
| 665 | case Type::U32: | ||
| 666 | return Inst(Opcode::SelectU32, condition, true_value, false_value); | ||
| 667 | case Type::U64: | ||
| 668 | return Inst(Opcode::SelectU64, condition, true_value, false_value); | ||
| 669 | case Type::F32: | ||
| 670 | return Inst(Opcode::SelectF32, condition, true_value, false_value); | ||
| 671 | case Type::F64: | ||
| 672 | return Inst(Opcode::SelectF64, condition, true_value, false_value); | ||
| 673 | default: | ||
| 674 | throw InvalidArgument("Invalid type {}", true_value.Type()); | ||
| 675 | } | ||
| 676 | } | ||
| 677 | |||
| 678 | template <> | ||
| 679 | IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) { | ||
| 680 | return Inst<IR::U32>(Opcode::BitCastU32F32, value); | ||
| 681 | } | ||
| 682 | |||
| 683 | template <> | ||
| 684 | IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) { | ||
| 685 | return Inst<IR::F32>(Opcode::BitCastF32U32, value); | ||
| 686 | } | ||
| 687 | |||
| 688 | template <> | ||
| 689 | IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) { | ||
| 690 | return Inst<IR::U16>(Opcode::BitCastU16F16, value); | ||
| 691 | } | ||
| 692 | |||
| 693 | template <> | ||
| 694 | IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) { | ||
| 695 | return Inst<IR::F16>(Opcode::BitCastF16U16, value); | ||
| 696 | } | ||
| 697 | |||
| 698 | template <> | ||
| 699 | IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) { | ||
| 700 | return Inst<IR::U64>(Opcode::BitCastU64F64, value); | ||
| 701 | } | ||
| 702 | |||
| 703 | template <> | ||
| 704 | IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) { | ||
| 705 | return Inst<IR::F64>(Opcode::BitCastF64U64, value); | ||
| 706 | } | ||
| 707 | |||
| 708 | U64 IREmitter::PackUint2x32(const Value& vector) { | ||
| 709 | return Inst<U64>(Opcode::PackUint2x32, vector); | ||
| 710 | } | ||
| 711 | |||
| 712 | Value IREmitter::UnpackUint2x32(const U64& value) { | ||
| 713 | return Inst<Value>(Opcode::UnpackUint2x32, value); | ||
| 714 | } | ||
| 715 | |||
| 716 | U32 IREmitter::PackFloat2x16(const Value& vector) { | ||
| 717 | return Inst<U32>(Opcode::PackFloat2x16, vector); | ||
| 718 | } | ||
| 719 | |||
| 720 | Value IREmitter::UnpackFloat2x16(const U32& value) { | ||
| 721 | return Inst(Opcode::UnpackFloat2x16, value); | ||
| 722 | } | ||
| 723 | |||
| 724 | U32 IREmitter::PackHalf2x16(const Value& vector) { | ||
| 725 | return Inst<U32>(Opcode::PackHalf2x16, vector); | ||
| 726 | } | ||
| 727 | |||
| 728 | Value IREmitter::UnpackHalf2x16(const U32& value) { | ||
| 729 | return Inst(Opcode::UnpackHalf2x16, value); | ||
| 730 | } | ||
| 731 | |||
| 732 | F64 IREmitter::PackDouble2x32(const Value& vector) { | ||
| 733 | return Inst<F64>(Opcode::PackDouble2x32, vector); | ||
| 734 | } | ||
| 735 | |||
| 736 | Value IREmitter::UnpackDouble2x32(const F64& value) { | ||
| 737 | return Inst<Value>(Opcode::UnpackDouble2x32, value); | ||
| 738 | } | ||
| 739 | |||
| 740 | F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||
| 741 | if (a.Type() != b.Type()) { | ||
| 742 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 743 | } | ||
| 744 | switch (a.Type()) { | ||
| 745 | case Type::F16: | ||
| 746 | return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b); | ||
| 747 | case Type::F32: | ||
| 748 | return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b); | ||
| 749 | case Type::F64: | ||
| 750 | return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b); | ||
| 751 | default: | ||
| 752 | ThrowInvalidType(a.Type()); | ||
| 753 | } | ||
| 754 | } | ||
| 755 | |||
| 756 | F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, | ||
| 757 | FpControl control) { | ||
| 758 | if (a.Type() != b.Type() || a.Type() != c.Type()) { | ||
| 759 | throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); | ||
| 760 | } | ||
| 761 | switch (a.Type()) { | ||
| 762 | case Type::F16: | ||
| 763 | return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c); | ||
| 764 | case Type::F32: | ||
| 765 | return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c); | ||
| 766 | case Type::F64: | ||
| 767 | return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c); | ||
| 768 | default: | ||
| 769 | ThrowInvalidType(a.Type()); | ||
| 770 | } | ||
| 771 | } | ||
| 772 | |||
| 773 | F16F32F64 IREmitter::FPAbs(const F16F32F64& value) { | ||
| 774 | switch (value.Type()) { | ||
| 775 | case Type::F16: | ||
| 776 | return Inst<F16>(Opcode::FPAbs16, value); | ||
| 777 | case Type::F32: | ||
| 778 | return Inst<F32>(Opcode::FPAbs32, value); | ||
| 779 | case Type::F64: | ||
| 780 | return Inst<F64>(Opcode::FPAbs64, value); | ||
| 781 | default: | ||
| 782 | ThrowInvalidType(value.Type()); | ||
| 783 | } | ||
| 784 | } | ||
| 785 | |||
| 786 | F16F32F64 IREmitter::FPNeg(const F16F32F64& value) { | ||
| 787 | switch (value.Type()) { | ||
| 788 | case Type::F16: | ||
| 789 | return Inst<F16>(Opcode::FPNeg16, value); | ||
| 790 | case Type::F32: | ||
| 791 | return Inst<F32>(Opcode::FPNeg32, value); | ||
| 792 | case Type::F64: | ||
| 793 | return Inst<F64>(Opcode::FPNeg64, value); | ||
| 794 | default: | ||
| 795 | ThrowInvalidType(value.Type()); | ||
| 796 | } | ||
| 797 | } | ||
| 798 | |||
| 799 | F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) { | ||
| 800 | F16F32F64 result{value}; | ||
| 801 | if (abs) { | ||
| 802 | result = FPAbs(result); | ||
| 803 | } | ||
| 804 | if (neg) { | ||
| 805 | result = FPNeg(result); | ||
| 806 | } | ||
| 807 | return result; | ||
| 808 | } | ||
| 809 | |||
| 810 | F32 IREmitter::FPCos(const F32& value) { | ||
| 811 | return Inst<F32>(Opcode::FPCos, value); | ||
| 812 | } | ||
| 813 | |||
| 814 | F32 IREmitter::FPSin(const F32& value) { | ||
| 815 | return Inst<F32>(Opcode::FPSin, value); | ||
| 816 | } | ||
| 817 | |||
| 818 | F32 IREmitter::FPExp2(const F32& value) { | ||
| 819 | return Inst<F32>(Opcode::FPExp2, value); | ||
| 820 | } | ||
| 821 | |||
| 822 | F32 IREmitter::FPLog2(const F32& value) { | ||
| 823 | return Inst<F32>(Opcode::FPLog2, value); | ||
| 824 | } | ||
| 825 | |||
| 826 | F32F64 IREmitter::FPRecip(const F32F64& value) { | ||
| 827 | switch (value.Type()) { | ||
| 828 | case Type::F32: | ||
| 829 | return Inst<F32>(Opcode::FPRecip32, value); | ||
| 830 | case Type::F64: | ||
| 831 | return Inst<F64>(Opcode::FPRecip64, value); | ||
| 832 | default: | ||
| 833 | ThrowInvalidType(value.Type()); | ||
| 834 | } | ||
| 835 | } | ||
| 836 | |||
| 837 | F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { | ||
| 838 | switch (value.Type()) { | ||
| 839 | case Type::F32: | ||
| 840 | return Inst<F32>(Opcode::FPRecipSqrt32, value); | ||
| 841 | case Type::F64: | ||
| 842 | return Inst<F64>(Opcode::FPRecipSqrt64, value); | ||
| 843 | default: | ||
| 844 | ThrowInvalidType(value.Type()); | ||
| 845 | } | ||
| 846 | } | ||
| 847 | |||
| 848 | F32 IREmitter::FPSqrt(const F32& value) { | ||
| 849 | return Inst<F32>(Opcode::FPSqrt, value); | ||
| 850 | } | ||
| 851 | |||
| 852 | F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { | ||
| 853 | switch (value.Type()) { | ||
| 854 | case Type::F16: | ||
| 855 | return Inst<F16>(Opcode::FPSaturate16, value); | ||
| 856 | case Type::F32: | ||
| 857 | return Inst<F32>(Opcode::FPSaturate32, value); | ||
| 858 | case Type::F64: | ||
| 859 | return Inst<F64>(Opcode::FPSaturate64, value); | ||
| 860 | default: | ||
| 861 | ThrowInvalidType(value.Type()); | ||
| 862 | } | ||
| 863 | } | ||
| 864 | |||
| 865 | F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||
| 866 | const F16F32F64& max_value) { | ||
| 867 | if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) { | ||
| 868 | throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(), | ||
| 869 | max_value.Type()); | ||
| 870 | } | ||
| 871 | switch (value.Type()) { | ||
| 872 | case Type::F16: | ||
| 873 | return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value); | ||
| 874 | case Type::F32: | ||
| 875 | return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value); | ||
| 876 | case Type::F64: | ||
| 877 | return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value); | ||
| 878 | default: | ||
| 879 | ThrowInvalidType(value.Type()); | ||
| 880 | } | ||
| 881 | } | ||
| 882 | |||
| 883 | F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { | ||
| 884 | switch (value.Type()) { | ||
| 885 | case Type::F16: | ||
| 886 | return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value); | ||
| 887 | case Type::F32: | ||
| 888 | return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value); | ||
| 889 | case Type::F64: | ||
| 890 | return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value); | ||
| 891 | default: | ||
| 892 | ThrowInvalidType(value.Type()); | ||
| 893 | } | ||
| 894 | } | ||
| 895 | |||
| 896 | F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { | ||
| 897 | switch (value.Type()) { | ||
| 898 | case Type::F16: | ||
| 899 | return Inst<F16>(Opcode::FPFloor16, Flags{control}, value); | ||
| 900 | case Type::F32: | ||
| 901 | return Inst<F32>(Opcode::FPFloor32, Flags{control}, value); | ||
| 902 | case Type::F64: | ||
| 903 | return Inst<F64>(Opcode::FPFloor64, Flags{control}, value); | ||
| 904 | default: | ||
| 905 | ThrowInvalidType(value.Type()); | ||
| 906 | } | ||
| 907 | } | ||
| 908 | |||
| 909 | F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { | ||
| 910 | switch (value.Type()) { | ||
| 911 | case Type::F16: | ||
| 912 | return Inst<F16>(Opcode::FPCeil16, Flags{control}, value); | ||
| 913 | case Type::F32: | ||
| 914 | return Inst<F32>(Opcode::FPCeil32, Flags{control}, value); | ||
| 915 | case Type::F64: | ||
| 916 | return Inst<F64>(Opcode::FPCeil64, Flags{control}, value); | ||
| 917 | default: | ||
| 918 | ThrowInvalidType(value.Type()); | ||
| 919 | } | ||
| 920 | } | ||
| 921 | |||
| 922 | F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { | ||
| 923 | switch (value.Type()) { | ||
| 924 | case Type::F16: | ||
| 925 | return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value); | ||
| 926 | case Type::F32: | ||
| 927 | return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value); | ||
| 928 | case Type::F64: | ||
| 929 | return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value); | ||
| 930 | default: | ||
| 931 | ThrowInvalidType(value.Type()); | ||
| 932 | } | ||
| 933 | } | ||
| 934 | |||
| 935 | U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) { | ||
| 936 | if (lhs.Type() != rhs.Type()) { | ||
| 937 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 938 | } | ||
| 939 | switch (lhs.Type()) { | ||
| 940 | case Type::F16: | ||
| 941 | return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control}, | ||
| 942 | lhs, rhs); | ||
| 943 | case Type::F32: | ||
| 944 | return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control}, | ||
| 945 | lhs, rhs); | ||
| 946 | case Type::F64: | ||
| 947 | return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control}, | ||
| 948 | lhs, rhs); | ||
| 949 | default: | ||
| 950 | ThrowInvalidType(lhs.Type()); | ||
| 951 | } | ||
| 952 | } | ||
| 953 | |||
| 954 | U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 955 | bool ordered) { | ||
| 956 | if (lhs.Type() != rhs.Type()) { | ||
| 957 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 958 | } | ||
| 959 | switch (lhs.Type()) { | ||
| 960 | case Type::F16: | ||
| 961 | return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, | ||
| 962 | Flags{control}, lhs, rhs); | ||
| 963 | case Type::F32: | ||
| 964 | return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, | ||
| 965 | Flags{control}, lhs, rhs); | ||
| 966 | case Type::F64: | ||
| 967 | return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, | ||
| 968 | Flags{control}, lhs, rhs); | ||
| 969 | default: | ||
| 970 | ThrowInvalidType(lhs.Type()); | ||
| 971 | } | ||
| 972 | } | ||
| 973 | |||
| 974 | U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 975 | bool ordered) { | ||
| 976 | if (lhs.Type() != rhs.Type()) { | ||
| 977 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 978 | } | ||
| 979 | switch (lhs.Type()) { | ||
| 980 | case Type::F16: | ||
| 981 | return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, | ||
| 982 | Flags{control}, lhs, rhs); | ||
| 983 | case Type::F32: | ||
| 984 | return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, | ||
| 985 | Flags{control}, lhs, rhs); | ||
| 986 | case Type::F64: | ||
| 987 | return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, | ||
| 988 | Flags{control}, lhs, rhs); | ||
| 989 | default: | ||
| 990 | ThrowInvalidType(lhs.Type()); | ||
| 991 | } | ||
| 992 | } | ||
| 993 | |||
| 994 | U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 995 | bool ordered) { | ||
| 996 | if (lhs.Type() != rhs.Type()) { | ||
| 997 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 998 | } | ||
| 999 | switch (lhs.Type()) { | ||
| 1000 | case Type::F16: | ||
| 1001 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, | ||
| 1002 | Flags{control}, lhs, rhs); | ||
| 1003 | case Type::F32: | ||
| 1004 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, | ||
| 1005 | Flags{control}, lhs, rhs); | ||
| 1006 | case Type::F64: | ||
| 1007 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, | ||
| 1008 | Flags{control}, lhs, rhs); | ||
| 1009 | default: | ||
| 1010 | ThrowInvalidType(lhs.Type()); | ||
| 1011 | } | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 1015 | bool ordered) { | ||
| 1016 | if (lhs.Type() != rhs.Type()) { | ||
| 1017 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1018 | } | ||
| 1019 | switch (lhs.Type()) { | ||
| 1020 | case Type::F16: | ||
| 1021 | return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16, | ||
| 1022 | Flags{control}, lhs, rhs); | ||
| 1023 | case Type::F32: | ||
| 1024 | return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32, | ||
| 1025 | Flags{control}, lhs, rhs); | ||
| 1026 | case Type::F64: | ||
| 1027 | return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64, | ||
| 1028 | Flags{control}, lhs, rhs); | ||
| 1029 | default: | ||
| 1030 | ThrowInvalidType(lhs.Type()); | ||
| 1031 | } | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, | ||
| 1035 | bool ordered) { | ||
| 1036 | if (lhs.Type() != rhs.Type()) { | ||
| 1037 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1038 | } | ||
| 1039 | switch (lhs.Type()) { | ||
| 1040 | case Type::F16: | ||
| 1041 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16 | ||
| 1042 | : Opcode::FPUnordGreaterThanEqual16, | ||
| 1043 | Flags{control}, lhs, rhs); | ||
| 1044 | case Type::F32: | ||
| 1045 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32 | ||
| 1046 | : Opcode::FPUnordGreaterThanEqual32, | ||
| 1047 | Flags{control}, lhs, rhs); | ||
| 1048 | case Type::F64: | ||
| 1049 | return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64 | ||
| 1050 | : Opcode::FPUnordGreaterThanEqual64, | ||
| 1051 | Flags{control}, lhs, rhs); | ||
| 1052 | default: | ||
| 1053 | ThrowInvalidType(lhs.Type()); | ||
| 1054 | } | ||
| 1055 | } | ||
| 1056 | |||
| 1057 | U1 IREmitter::FPIsNan(const F16F32F64& value) { | ||
| 1058 | switch (value.Type()) { | ||
| 1059 | case Type::F16: | ||
| 1060 | return Inst<U1>(Opcode::FPIsNan16, value); | ||
| 1061 | case Type::F32: | ||
| 1062 | return Inst<U1>(Opcode::FPIsNan32, value); | ||
| 1063 | case Type::F64: | ||
| 1064 | return Inst<U1>(Opcode::FPIsNan64, value); | ||
| 1065 | default: | ||
| 1066 | ThrowInvalidType(value.Type()); | ||
| 1067 | } | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) { | ||
| 1071 | if (lhs.Type() != rhs.Type()) { | ||
| 1072 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1073 | } | ||
| 1074 | return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs))); | ||
| 1075 | } | ||
| 1076 | |||
| 1077 | U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) { | ||
| 1078 | if (lhs.Type() != rhs.Type()) { | ||
| 1079 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1080 | } | ||
| 1081 | return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) { | ||
| 1085 | if (lhs.Type() != rhs.Type()) { | ||
| 1086 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1087 | } | ||
| 1088 | switch (lhs.Type()) { | ||
| 1089 | case Type::F32: | ||
| 1090 | return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs); | ||
| 1091 | case Type::F64: | ||
| 1092 | return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs); | ||
| 1093 | default: | ||
| 1094 | ThrowInvalidType(lhs.Type()); | ||
| 1095 | } | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) { | ||
| 1099 | if (lhs.Type() != rhs.Type()) { | ||
| 1100 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1101 | } | ||
| 1102 | switch (lhs.Type()) { | ||
| 1103 | case Type::F32: | ||
| 1104 | return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs); | ||
| 1105 | case Type::F64: | ||
| 1106 | return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs); | ||
| 1107 | default: | ||
| 1108 | ThrowInvalidType(lhs.Type()); | ||
| 1109 | } | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { | ||
| 1113 | if (a.Type() != b.Type()) { | ||
| 1114 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 1115 | } | ||
| 1116 | switch (a.Type()) { | ||
| 1117 | case Type::U32: | ||
| 1118 | return Inst<U32>(Opcode::IAdd32, a, b); | ||
| 1119 | case Type::U64: | ||
| 1120 | return Inst<U64>(Opcode::IAdd64, a, b); | ||
| 1121 | default: | ||
| 1122 | ThrowInvalidType(a.Type()); | ||
| 1123 | } | ||
| 1124 | } | ||
| 1125 | |||
| 1126 | U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) { | ||
| 1127 | if (a.Type() != b.Type()) { | ||
| 1128 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||
| 1129 | } | ||
| 1130 | switch (a.Type()) { | ||
| 1131 | case Type::U32: | ||
| 1132 | return Inst<U32>(Opcode::ISub32, a, b); | ||
| 1133 | case Type::U64: | ||
| 1134 | return Inst<U64>(Opcode::ISub64, a, b); | ||
| 1135 | default: | ||
| 1136 | ThrowInvalidType(a.Type()); | ||
| 1137 | } | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | U32 IREmitter::IMul(const U32& a, const U32& b) { | ||
| 1141 | return Inst<U32>(Opcode::IMul32, a, b); | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | U32U64 IREmitter::INeg(const U32U64& value) { | ||
| 1145 | switch (value.Type()) { | ||
| 1146 | case Type::U32: | ||
| 1147 | return Inst<U32>(Opcode::INeg32, value); | ||
| 1148 | case Type::U64: | ||
| 1149 | return Inst<U64>(Opcode::INeg64, value); | ||
| 1150 | default: | ||
| 1151 | ThrowInvalidType(value.Type()); | ||
| 1152 | } | ||
| 1153 | } | ||
| 1154 | |||
| 1155 | U32 IREmitter::IAbs(const U32& value) { | ||
| 1156 | return Inst<U32>(Opcode::IAbs32, value); | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) { | ||
| 1160 | switch (base.Type()) { | ||
| 1161 | case Type::U32: | ||
| 1162 | return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift); | ||
| 1163 | case Type::U64: | ||
| 1164 | return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift); | ||
| 1165 | default: | ||
| 1166 | ThrowInvalidType(base.Type()); | ||
| 1167 | } | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) { | ||
| 1171 | switch (base.Type()) { | ||
| 1172 | case Type::U32: | ||
| 1173 | return Inst<U32>(Opcode::ShiftRightLogical32, base, shift); | ||
| 1174 | case Type::U64: | ||
| 1175 | return Inst<U64>(Opcode::ShiftRightLogical64, base, shift); | ||
| 1176 | default: | ||
| 1177 | ThrowInvalidType(base.Type()); | ||
| 1178 | } | ||
| 1179 | } | ||
| 1180 | |||
| 1181 | U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) { | ||
| 1182 | switch (base.Type()) { | ||
| 1183 | case Type::U32: | ||
| 1184 | return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift); | ||
| 1185 | case Type::U64: | ||
| 1186 | return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift); | ||
| 1187 | default: | ||
| 1188 | ThrowInvalidType(base.Type()); | ||
| 1189 | } | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { | ||
| 1193 | return Inst<U32>(Opcode::BitwiseAnd32, a, b); | ||
| 1194 | } | ||
| 1195 | |||
| 1196 | U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { | ||
| 1197 | return Inst<U32>(Opcode::BitwiseOr32, a, b); | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { | ||
| 1201 | return Inst<U32>(Opcode::BitwiseXor32, a, b); | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||
| 1205 | const U32& count) { | ||
| 1206 | return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count); | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||
| 1210 | bool is_signed) { | ||
| 1211 | return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, | ||
| 1212 | count); | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | U32 IREmitter::BitReverse(const U32& value) { | ||
| 1216 | return Inst<U32>(Opcode::BitReverse32, value); | ||
| 1217 | } | ||
| 1218 | |||
| 1219 | U32 IREmitter::BitCount(const U32& value) { | ||
| 1220 | return Inst<U32>(Opcode::BitCount32, value); | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | U32 IREmitter::BitwiseNot(const U32& value) { | ||
| 1224 | return Inst<U32>(Opcode::BitwiseNot32, value); | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | U32 IREmitter::FindSMsb(const U32& value) { | ||
| 1228 | return Inst<U32>(Opcode::FindSMsb32, value); | ||
| 1229 | } | ||
| 1230 | |||
| 1231 | U32 IREmitter::FindUMsb(const U32& value) { | ||
| 1232 | return Inst<U32>(Opcode::FindUMsb32, value); | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | U32 IREmitter::SMin(const U32& a, const U32& b) { | ||
| 1236 | return Inst<U32>(Opcode::SMin32, a, b); | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | U32 IREmitter::UMin(const U32& a, const U32& b) { | ||
| 1240 | return Inst<U32>(Opcode::UMin32, a, b); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) { | ||
| 1244 | return is_signed ? SMin(a, b) : UMin(a, b); | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | U32 IREmitter::SMax(const U32& a, const U32& b) { | ||
| 1248 | return Inst<U32>(Opcode::SMax32, a, b); | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | U32 IREmitter::UMax(const U32& a, const U32& b) { | ||
| 1252 | return Inst<U32>(Opcode::UMax32, a, b); | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) { | ||
| 1256 | return is_signed ? SMax(a, b) : UMax(a, b); | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) { | ||
| 1260 | return Inst<U32>(Opcode::SClamp32, value, min, max); | ||
| 1261 | } | ||
| 1262 | |||
| 1263 | U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { | ||
| 1264 | return Inst<U32>(Opcode::UClamp32, value, min, max); | ||
| 1265 | } | ||
| 1266 | |||
| 1267 | U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1268 | return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); | ||
| 1269 | } | ||
| 1270 | |||
| 1271 | U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { | ||
| 1272 | if (lhs.Type() != rhs.Type()) { | ||
| 1273 | throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type()); | ||
| 1274 | } | ||
| 1275 | switch (lhs.Type()) { | ||
| 1276 | case Type::U32: | ||
| 1277 | return Inst<U1>(Opcode::IEqual, lhs, rhs); | ||
| 1278 | case Type::U64: { | ||
| 1279 | // Manually compare the unpacked values | ||
| 1280 | const Value lhs_vector{UnpackUint2x32(lhs)}; | ||
| 1281 | const Value rhs_vector{UnpackUint2x32(rhs)}; | ||
| 1282 | return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)}, | ||
| 1283 | IR::U32{CompositeExtract(rhs_vector, 0)}), | ||
| 1284 | IEqual(IR::U32{CompositeExtract(lhs_vector, 1)}, | ||
| 1285 | IR::U32{CompositeExtract(rhs_vector, 1)})); | ||
| 1286 | } | ||
| 1287 | default: | ||
| 1288 | ThrowInvalidType(lhs.Type()); | ||
| 1289 | } | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1293 | return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1297 | return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { | ||
| 1301 | return Inst<U1>(Opcode::INotEqual, lhs, rhs); | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||
| 1305 | return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { | ||
| 1309 | return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value); | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { | ||
| 1313 | return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value); | ||
| 1314 | } | ||
| 1315 | |||
| 1316 | U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { | ||
| 1317 | return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value); | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { | ||
| 1321 | return is_signed ? SharedAtomicSMin(pointer_offset, value) | ||
| 1322 | : SharedAtomicUMin(pointer_offset, value); | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { | ||
| 1326 | return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { | ||
| 1330 | return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value); | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { | ||
| 1334 | return is_signed ? SharedAtomicSMax(pointer_offset, value) | ||
| 1335 | : SharedAtomicUMax(pointer_offset, value); | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { | ||
| 1339 | return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value); | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { | ||
| 1343 | return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value); | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { | ||
| 1347 | return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { | ||
| 1351 | return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value); | ||
| 1352 | } | ||
| 1353 | |||
| 1354 | U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { | ||
| 1355 | return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value); | ||
| 1356 | } | ||
| 1357 | |||
| 1358 | U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { | ||
| 1359 | switch (value.Type()) { | ||
| 1360 | case Type::U32: | ||
| 1361 | return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value); | ||
| 1362 | case Type::U64: | ||
| 1363 | return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value); | ||
| 1364 | default: | ||
| 1365 | ThrowInvalidType(pointer_offset.Type()); | ||
| 1366 | } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { | ||
| 1370 | switch (value.Type()) { | ||
| 1371 | case Type::U32: | ||
| 1372 | return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value); | ||
| 1373 | case Type::U64: | ||
| 1374 | return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value); | ||
| 1375 | default: | ||
| 1376 | ThrowInvalidType(value.Type()); | ||
| 1377 | } | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { | ||
| 1381 | switch (value.Type()) { | ||
| 1382 | case Type::U32: | ||
| 1383 | return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value); | ||
| 1384 | case Type::U64: | ||
| 1385 | return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value); | ||
| 1386 | default: | ||
| 1387 | ThrowInvalidType(value.Type()); | ||
| 1388 | } | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { | ||
| 1392 | switch (value.Type()) { | ||
| 1393 | case Type::U32: | ||
| 1394 | return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value); | ||
| 1395 | case Type::U64: | ||
| 1396 | return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value); | ||
| 1397 | default: | ||
| 1398 | ThrowInvalidType(value.Type()); | ||
| 1399 | } | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||
| 1403 | return is_signed ? GlobalAtomicSMin(pointer_offset, value) | ||
| 1404 | : GlobalAtomicUMin(pointer_offset, value); | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { | ||
| 1408 | switch (value.Type()) { | ||
| 1409 | case Type::U32: | ||
| 1410 | return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value); | ||
| 1411 | case Type::U64: | ||
| 1412 | return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value); | ||
| 1413 | default: | ||
| 1414 | ThrowInvalidType(value.Type()); | ||
| 1415 | } | ||
| 1416 | } | ||
| 1417 | |||
| 1418 | U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { | ||
| 1419 | switch (value.Type()) { | ||
| 1420 | case Type::U32: | ||
| 1421 | return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value); | ||
| 1422 | case Type::U64: | ||
| 1423 | return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value); | ||
| 1424 | default: | ||
| 1425 | ThrowInvalidType(value.Type()); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||
| 1430 | return is_signed ? GlobalAtomicSMax(pointer_offset, value) | ||
| 1431 | : GlobalAtomicUMax(pointer_offset, value); | ||
| 1432 | } | ||
| 1433 | |||
| 1434 | U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { | ||
| 1435 | return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value); | ||
| 1436 | } | ||
| 1437 | |||
| 1438 | U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { | ||
| 1439 | return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value); | ||
| 1440 | } | ||
| 1441 | |||
| 1442 | U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { | ||
| 1443 | switch (value.Type()) { | ||
| 1444 | case Type::U32: | ||
| 1445 | return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value); | ||
| 1446 | case Type::U64: | ||
| 1447 | return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value); | ||
| 1448 | default: | ||
| 1449 | ThrowInvalidType(value.Type()); | ||
| 1450 | } | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { | ||
| 1454 | switch (value.Type()) { | ||
| 1455 | case Type::U32: | ||
| 1456 | return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value); | ||
| 1457 | case Type::U64: | ||
| 1458 | return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value); | ||
| 1459 | default: | ||
| 1460 | ThrowInvalidType(value.Type()); | ||
| 1461 | } | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { | ||
| 1465 | switch (value.Type()) { | ||
| 1466 | case Type::U32: | ||
| 1467 | return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value); | ||
| 1468 | case Type::U64: | ||
| 1469 | return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value); | ||
| 1470 | default: | ||
| 1471 | ThrowInvalidType(value.Type()); | ||
| 1472 | } | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { | ||
| 1476 | switch (value.Type()) { | ||
| 1477 | case Type::U32: | ||
| 1478 | return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value); | ||
| 1479 | case Type::U64: | ||
| 1480 | return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value); | ||
| 1481 | default: | ||
| 1482 | ThrowInvalidType(pointer_offset.Type()); | ||
| 1483 | } | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||
| 1487 | const FpControl control) { | ||
| 1488 | return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||
| 1492 | const FpControl control) { | ||
| 1493 | return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||
| 1497 | const FpControl control) { | ||
| 1498 | return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||
| 1502 | const FpControl control) { | ||
| 1503 | return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); | ||
| 1504 | } | ||
| 1505 | |||
| 1506 | U1 IREmitter::LogicalOr(const U1& a, const U1& b) { | ||
| 1507 | return Inst<U1>(Opcode::LogicalOr, a, b); | ||
| 1508 | } | ||
| 1509 | |||
| 1510 | U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { | ||
| 1511 | return Inst<U1>(Opcode::LogicalAnd, a, b); | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | U1 IREmitter::LogicalXor(const U1& a, const U1& b) { | ||
| 1515 | return Inst<U1>(Opcode::LogicalXor, a, b); | ||
| 1516 | } | ||
| 1517 | |||
| 1518 | U1 IREmitter::LogicalNot(const U1& value) { | ||
| 1519 | return Inst<U1>(Opcode::LogicalNot, value); | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) { | ||
| 1523 | switch (bitsize) { | ||
| 1524 | case 16: | ||
| 1525 | switch (value.Type()) { | ||
| 1526 | case Type::F16: | ||
| 1527 | return Inst<U32>(Opcode::ConvertS16F16, value); | ||
| 1528 | case Type::F32: | ||
| 1529 | return Inst<U32>(Opcode::ConvertS16F32, value); | ||
| 1530 | case Type::F64: | ||
| 1531 | return Inst<U32>(Opcode::ConvertS16F64, value); | ||
| 1532 | default: | ||
| 1533 | ThrowInvalidType(value.Type()); | ||
| 1534 | } | ||
| 1535 | case 32: | ||
| 1536 | switch (value.Type()) { | ||
| 1537 | case Type::F16: | ||
| 1538 | return Inst<U32>(Opcode::ConvertS32F16, value); | ||
| 1539 | case Type::F32: | ||
| 1540 | return Inst<U32>(Opcode::ConvertS32F32, value); | ||
| 1541 | case Type::F64: | ||
| 1542 | return Inst<U32>(Opcode::ConvertS32F64, value); | ||
| 1543 | default: | ||
| 1544 | ThrowInvalidType(value.Type()); | ||
| 1545 | } | ||
| 1546 | case 64: | ||
| 1547 | switch (value.Type()) { | ||
| 1548 | case Type::F16: | ||
| 1549 | return Inst<U64>(Opcode::ConvertS64F16, value); | ||
| 1550 | case Type::F32: | ||
| 1551 | return Inst<U64>(Opcode::ConvertS64F32, value); | ||
| 1552 | case Type::F64: | ||
| 1553 | return Inst<U64>(Opcode::ConvertS64F64, value); | ||
| 1554 | default: | ||
| 1555 | ThrowInvalidType(value.Type()); | ||
| 1556 | } | ||
| 1557 | default: | ||
| 1558 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1559 | } | ||
| 1560 | } | ||
| 1561 | |||
| 1562 | U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) { | ||
| 1563 | switch (bitsize) { | ||
| 1564 | case 16: | ||
| 1565 | switch (value.Type()) { | ||
| 1566 | case Type::F16: | ||
| 1567 | return Inst<U32>(Opcode::ConvertU16F16, value); | ||
| 1568 | case Type::F32: | ||
| 1569 | return Inst<U32>(Opcode::ConvertU16F32, value); | ||
| 1570 | case Type::F64: | ||
| 1571 | return Inst<U32>(Opcode::ConvertU16F64, value); | ||
| 1572 | default: | ||
| 1573 | ThrowInvalidType(value.Type()); | ||
| 1574 | } | ||
| 1575 | case 32: | ||
| 1576 | switch (value.Type()) { | ||
| 1577 | case Type::F16: | ||
| 1578 | return Inst<U32>(Opcode::ConvertU32F16, value); | ||
| 1579 | case Type::F32: | ||
| 1580 | return Inst<U32>(Opcode::ConvertU32F32, value); | ||
| 1581 | case Type::F64: | ||
| 1582 | return Inst<U32>(Opcode::ConvertU32F64, value); | ||
| 1583 | default: | ||
| 1584 | ThrowInvalidType(value.Type()); | ||
| 1585 | } | ||
| 1586 | case 64: | ||
| 1587 | switch (value.Type()) { | ||
| 1588 | case Type::F16: | ||
| 1589 | return Inst<U64>(Opcode::ConvertU64F16, value); | ||
| 1590 | case Type::F32: | ||
| 1591 | return Inst<U64>(Opcode::ConvertU64F32, value); | ||
| 1592 | case Type::F64: | ||
| 1593 | return Inst<U64>(Opcode::ConvertU64F64, value); | ||
| 1594 | default: | ||
| 1595 | ThrowInvalidType(value.Type()); | ||
| 1596 | } | ||
| 1597 | default: | ||
| 1598 | throw InvalidArgument("Invalid destination bitsize {}", bitsize); | ||
| 1599 | } | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) { | ||
| 1603 | return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value); | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 1607 | FpControl control) { | ||
| 1608 | switch (dest_bitsize) { | ||
| 1609 | case 16: | ||
| 1610 | switch (src_bitsize) { | ||
| 1611 | case 8: | ||
| 1612 | return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value); | ||
| 1613 | case 16: | ||
| 1614 | return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value); | ||
| 1615 | case 32: | ||
| 1616 | return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value); | ||
| 1617 | case 64: | ||
| 1618 | return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value); | ||
| 1619 | } | ||
| 1620 | break; | ||
| 1621 | case 32: | ||
| 1622 | switch (src_bitsize) { | ||
| 1623 | case 8: | ||
| 1624 | return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value); | ||
| 1625 | case 16: | ||
| 1626 | return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value); | ||
| 1627 | case 32: | ||
| 1628 | return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value); | ||
| 1629 | case 64: | ||
| 1630 | return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value); | ||
| 1631 | } | ||
| 1632 | break; | ||
| 1633 | case 64: | ||
| 1634 | switch (src_bitsize) { | ||
| 1635 | case 8: | ||
| 1636 | return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value); | ||
| 1637 | case 16: | ||
| 1638 | return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value); | ||
| 1639 | case 32: | ||
| 1640 | return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value); | ||
| 1641 | case 64: | ||
| 1642 | return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value); | ||
| 1643 | } | ||
| 1644 | break; | ||
| 1645 | } | ||
| 1646 | throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); | ||
| 1647 | } | ||
| 1648 | |||
| 1649 | F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 1650 | FpControl control) { | ||
| 1651 | switch (dest_bitsize) { | ||
| 1652 | case 16: | ||
| 1653 | switch (src_bitsize) { | ||
| 1654 | case 8: | ||
| 1655 | return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value); | ||
| 1656 | case 16: | ||
| 1657 | return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value); | ||
| 1658 | case 32: | ||
| 1659 | return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value); | ||
| 1660 | case 64: | ||
| 1661 | return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value); | ||
| 1662 | } | ||
| 1663 | break; | ||
| 1664 | case 32: | ||
| 1665 | switch (src_bitsize) { | ||
| 1666 | case 8: | ||
| 1667 | return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value); | ||
| 1668 | case 16: | ||
| 1669 | return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value); | ||
| 1670 | case 32: | ||
| 1671 | return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value); | ||
| 1672 | case 64: | ||
| 1673 | return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value); | ||
| 1674 | } | ||
| 1675 | break; | ||
| 1676 | case 64: | ||
| 1677 | switch (src_bitsize) { | ||
| 1678 | case 8: | ||
| 1679 | return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value); | ||
| 1680 | case 16: | ||
| 1681 | return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value); | ||
| 1682 | case 32: | ||
| 1683 | return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value); | ||
| 1684 | case 64: | ||
| 1685 | return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value); | ||
| 1686 | } | ||
| 1687 | break; | ||
| 1688 | } | ||
| 1689 | throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize); | ||
| 1690 | } | ||
| 1691 | |||
| 1692 | F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, | ||
| 1693 | const Value& value, FpControl control) { | ||
| 1694 | return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control) | ||
| 1695 | : ConvertUToF(dest_bitsize, src_bitsize, value, control); | ||
| 1696 | } | ||
| 1697 | |||
| 1698 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | ||
| 1699 | switch (result_bitsize) { | ||
| 1700 | case 32: | ||
| 1701 | switch (value.Type()) { | ||
| 1702 | case Type::U32: | ||
| 1703 | // Nothing to do | ||
| 1704 | return value; | ||
| 1705 | case Type::U64: | ||
| 1706 | return Inst<U32>(Opcode::ConvertU32U64, value); | ||
| 1707 | default: | ||
| 1708 | break; | ||
| 1709 | } | ||
| 1710 | break; | ||
| 1711 | case 64: | ||
| 1712 | switch (value.Type()) { | ||
| 1713 | case Type::U32: | ||
| 1714 | return Inst<U64>(Opcode::ConvertU64U32, value); | ||
| 1715 | case Type::U64: | ||
| 1716 | // Nothing to do | ||
| 1717 | return value; | ||
| 1718 | default: | ||
| 1719 | break; | ||
| 1720 | } | ||
| 1721 | } | ||
| 1722 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1723 | } | ||
| 1724 | |||
| 1725 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) { | ||
| 1726 | switch (result_bitsize) { | ||
| 1727 | case 16: | ||
| 1728 | switch (value.Type()) { | ||
| 1729 | case Type::F16: | ||
| 1730 | // Nothing to do | ||
| 1731 | return value; | ||
| 1732 | case Type::F32: | ||
| 1733 | return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value); | ||
| 1734 | case Type::F64: | ||
| 1735 | throw LogicError("Illegal conversion from F64 to F16"); | ||
| 1736 | default: | ||
| 1737 | break; | ||
| 1738 | } | ||
| 1739 | break; | ||
| 1740 | case 32: | ||
| 1741 | switch (value.Type()) { | ||
| 1742 | case Type::F16: | ||
| 1743 | return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value); | ||
| 1744 | case Type::F32: | ||
| 1745 | // Nothing to do | ||
| 1746 | return value; | ||
| 1747 | case Type::F64: | ||
| 1748 | return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value); | ||
| 1749 | default: | ||
| 1750 | break; | ||
| 1751 | } | ||
| 1752 | break; | ||
| 1753 | case 64: | ||
| 1754 | switch (value.Type()) { | ||
| 1755 | case Type::F16: | ||
| 1756 | throw LogicError("Illegal conversion from F16 to F64"); | ||
| 1757 | case Type::F32: | ||
| 1758 | return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value); | ||
| 1759 | case Type::F64: | ||
| 1760 | // Nothing to do | ||
| 1761 | return value; | ||
| 1762 | default: | ||
| 1763 | break; | ||
| 1764 | } | ||
| 1765 | break; | ||
| 1766 | } | ||
| 1767 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias, | ||
| 1771 | const Value& offset, const F32& lod_clamp, | ||
| 1772 | TextureInstInfo info) { | ||
| 1773 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1774 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod | ||
| 1775 | : Opcode::BindlessImageSampleImplicitLod}; | ||
| 1776 | return Inst(op, Flags{info}, handle, coords, bias_lc, offset); | ||
| 1777 | } | ||
| 1778 | |||
| 1779 | Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod, | ||
| 1780 | const Value& offset, TextureInstInfo info) { | ||
| 1781 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod | ||
| 1782 | : Opcode::BindlessImageSampleExplicitLod}; | ||
| 1783 | return Inst(op, Flags{info}, handle, coords, lod, offset); | ||
| 1784 | } | ||
| 1785 | |||
| 1786 | F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1787 | const F32& bias, const Value& offset, | ||
| 1788 | const F32& lod_clamp, TextureInstInfo info) { | ||
| 1789 | const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)}; | ||
| 1790 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod | ||
| 1791 | : Opcode::BindlessImageSampleDrefImplicitLod}; | ||
| 1792 | return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset); | ||
| 1793 | } | ||
| 1794 | |||
| 1795 | F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref, | ||
| 1796 | const F32& lod, const Value& offset, | ||
| 1797 | TextureInstInfo info) { | ||
| 1798 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod | ||
| 1799 | : Opcode::BindlessImageSampleDrefExplicitLod}; | ||
| 1800 | return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset); | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset, | ||
| 1804 | const Value& offset2, TextureInstInfo info) { | ||
| 1805 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather}; | ||
| 1806 | return Inst(op, Flags{info}, handle, coords, offset, offset2); | ||
| 1807 | } | ||
| 1808 | |||
| 1809 | Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | ||
| 1810 | const Value& offset2, const F32& dref, TextureInstInfo info) { | ||
| 1811 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref | ||
| 1812 | : Opcode::BindlessImageGatherDref}; | ||
| 1813 | return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); | ||
| 1814 | } | ||
| 1815 | |||
| 1816 | Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset, | ||
| 1817 | const U32& lod, const U32& multisampling, TextureInstInfo info) { | ||
| 1818 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch}; | ||
| 1819 | return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling); | ||
| 1820 | } | ||
| 1821 | |||
| 1822 | Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) { | ||
| 1823 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions | ||
| 1824 | : Opcode::BindlessImageQueryDimensions}; | ||
| 1825 | return Inst(op, handle, lod); | ||
| 1826 | } | ||
| 1827 | |||
| 1828 | Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) { | ||
| 1829 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod | ||
| 1830 | : Opcode::BindlessImageQueryLod}; | ||
| 1831 | return Inst(op, Flags{info}, handle, coords); | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, | ||
| 1835 | const Value& offset, const F32& lod_clamp, TextureInstInfo info) { | ||
| 1836 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient | ||
| 1837 | : Opcode::BindlessImageGradient}; | ||
| 1838 | return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { | ||
| 1842 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead}; | ||
| 1843 | return Inst(op, Flags{info}, handle, coords); | ||
| 1844 | } | ||
| 1845 | |||
| 1846 | void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, | ||
| 1847 | TextureInstInfo info) { | ||
| 1848 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; | ||
| 1849 | Inst(op, Flags{info}, handle, coords, color); | ||
| 1850 | } | ||
| 1851 | |||
| 1852 | Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value, | ||
| 1853 | TextureInstInfo info) { | ||
| 1854 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32 | ||
| 1855 | : Opcode::BindlessImageAtomicIAdd32}; | ||
| 1856 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1857 | } | ||
| 1858 | |||
| 1859 | Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value, | ||
| 1860 | TextureInstInfo info) { | ||
| 1861 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32 | ||
| 1862 | : Opcode::BindlessImageAtomicSMin32}; | ||
| 1863 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1864 | } | ||
| 1865 | |||
| 1866 | Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value, | ||
| 1867 | TextureInstInfo info) { | ||
| 1868 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32 | ||
| 1869 | : Opcode::BindlessImageAtomicUMin32}; | ||
| 1870 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1871 | } | ||
| 1872 | |||
| 1873 | Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value, | ||
| 1874 | bool is_signed, TextureInstInfo info) { | ||
| 1875 | return is_signed ? ImageAtomicSMin(handle, coords, value, info) | ||
| 1876 | : ImageAtomicUMin(handle, coords, value, info); | ||
| 1877 | } | ||
| 1878 | |||
| 1879 | Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value, | ||
| 1880 | TextureInstInfo info) { | ||
| 1881 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32 | ||
| 1882 | : Opcode::BindlessImageAtomicSMax32}; | ||
| 1883 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value, | ||
| 1887 | TextureInstInfo info) { | ||
| 1888 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32 | ||
| 1889 | : Opcode::BindlessImageAtomicUMax32}; | ||
| 1890 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1891 | } | ||
| 1892 | |||
| 1893 | Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value, | ||
| 1894 | bool is_signed, TextureInstInfo info) { | ||
| 1895 | return is_signed ? ImageAtomicSMax(handle, coords, value, info) | ||
| 1896 | : ImageAtomicUMax(handle, coords, value, info); | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, | ||
| 1900 | TextureInstInfo info) { | ||
| 1901 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32 | ||
| 1902 | : Opcode::BindlessImageAtomicInc32}; | ||
| 1903 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1904 | } | ||
| 1905 | |||
| 1906 | Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, | ||
| 1907 | TextureInstInfo info) { | ||
| 1908 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32 | ||
| 1909 | : Opcode::BindlessImageAtomicDec32}; | ||
| 1910 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, | ||
| 1914 | TextureInstInfo info) { | ||
| 1915 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32 | ||
| 1916 | : Opcode::BindlessImageAtomicAnd32}; | ||
| 1917 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1918 | } | ||
| 1919 | |||
| 1920 | Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, | ||
| 1921 | TextureInstInfo info) { | ||
| 1922 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32 | ||
| 1923 | : Opcode::BindlessImageAtomicOr32}; | ||
| 1924 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, | ||
| 1928 | TextureInstInfo info) { | ||
| 1929 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32 | ||
| 1930 | : Opcode::BindlessImageAtomicXor32}; | ||
| 1931 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value, | ||
| 1935 | TextureInstInfo info) { | ||
| 1936 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32 | ||
| 1937 | : Opcode::BindlessImageAtomicExchange32}; | ||
| 1938 | return Inst(op, Flags{info}, handle, coords, value); | ||
| 1939 | } | ||
| 1940 | |||
| 1941 | U1 IREmitter::VoteAll(const U1& value) { | ||
| 1942 | return Inst<U1>(Opcode::VoteAll, value); | ||
| 1943 | } | ||
| 1944 | |||
| 1945 | U1 IREmitter::VoteAny(const U1& value) { | ||
| 1946 | return Inst<U1>(Opcode::VoteAny, value); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | U1 IREmitter::VoteEqual(const U1& value) { | ||
| 1950 | return Inst<U1>(Opcode::VoteEqual, value); | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | U32 IREmitter::SubgroupBallot(const U1& value) { | ||
| 1954 | return Inst<U32>(Opcode::SubgroupBallot, value); | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | U32 IREmitter::SubgroupEqMask() { | ||
| 1958 | return Inst<U32>(Opcode::SubgroupEqMask); | ||
| 1959 | } | ||
| 1960 | |||
| 1961 | U32 IREmitter::SubgroupLtMask() { | ||
| 1962 | return Inst<U32>(Opcode::SubgroupLtMask); | ||
| 1963 | } | ||
| 1964 | |||
| 1965 | U32 IREmitter::SubgroupLeMask() { | ||
| 1966 | return Inst<U32>(Opcode::SubgroupLeMask); | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | U32 IREmitter::SubgroupGtMask() { | ||
| 1970 | return Inst<U32>(Opcode::SubgroupGtMask); | ||
| 1971 | } | ||
| 1972 | |||
| 1973 | U32 IREmitter::SubgroupGeMask() { | ||
| 1974 | return Inst<U32>(Opcode::SubgroupGeMask); | ||
| 1975 | } | ||
| 1976 | |||
| 1977 | U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1978 | const IR::U32& seg_mask) { | ||
| 1979 | return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); | ||
| 1980 | } | ||
| 1981 | |||
| 1982 | U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1983 | const IR::U32& seg_mask) { | ||
| 1984 | return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); | ||
| 1985 | } | ||
| 1986 | |||
| 1987 | U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1988 | const IR::U32& seg_mask) { | ||
| 1989 | return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); | ||
| 1990 | } | ||
| 1991 | |||
| 1992 | U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1993 | const IR::U32& seg_mask) { | ||
| 1994 | return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||
| 1995 | } | ||
| 1996 | |||
| 1997 | F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) { | ||
| 1998 | return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); | ||
| 1999 | } | ||
| 2000 | |||
| 2001 | F32 IREmitter::DPdxFine(const F32& a) { | ||
| 2002 | return Inst<F32>(Opcode::DPdxFine, a); | ||
| 2003 | } | ||
| 2004 | |||
| 2005 | F32 IREmitter::DPdyFine(const F32& a) { | ||
| 2006 | return Inst<F32>(Opcode::DPdyFine, a); | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | F32 IREmitter::DPdxCoarse(const F32& a) { | ||
| 2010 | return Inst<F32>(Opcode::DPdxCoarse, a); | ||
| 2011 | } | ||
| 2012 | |||
| 2013 | F32 IREmitter::DPdyCoarse(const F32& a) { | ||
| 2014 | return Inst<F32>(Opcode::DPdyCoarse, a); | ||
| 2015 | } | ||
| 2016 | |||
| 2017 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h new file mode 100644 index 000000000..53f7b3b06 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -0,0 +1,413 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstring> | ||
| 8 | #include <type_traits> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | class IREmitter { | ||
| 18 | public: | ||
| 19 | explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {} | ||
| 20 | explicit IREmitter(Block& block_, Block::iterator insertion_point_) | ||
| 21 | : block{&block_}, insertion_point{insertion_point_} {} | ||
| 22 | |||
| 23 | Block* block; | ||
| 24 | |||
| 25 | [[nodiscard]] U1 Imm1(bool value) const; | ||
| 26 | [[nodiscard]] U8 Imm8(u8 value) const; | ||
| 27 | [[nodiscard]] U16 Imm16(u16 value) const; | ||
| 28 | [[nodiscard]] U32 Imm32(u32 value) const; | ||
| 29 | [[nodiscard]] U32 Imm32(s32 value) const; | ||
| 30 | [[nodiscard]] F32 Imm32(f32 value) const; | ||
| 31 | [[nodiscard]] U64 Imm64(u64 value) const; | ||
| 32 | [[nodiscard]] U64 Imm64(s64 value) const; | ||
| 33 | [[nodiscard]] F64 Imm64(f64 value) const; | ||
| 34 | |||
| 35 | U1 ConditionRef(const U1& value); | ||
| 36 | void Reference(const Value& value); | ||
| 37 | |||
| 38 | void PhiMove(IR::Inst& phi, const Value& value); | ||
| 39 | |||
| 40 | void Prologue(); | ||
| 41 | void Epilogue(); | ||
| 42 | void DemoteToHelperInvocation(); | ||
| 43 | void EmitVertex(const U32& stream); | ||
| 44 | void EndPrimitive(const U32& stream); | ||
| 45 | |||
| 46 | [[nodiscard]] U32 GetReg(IR::Reg reg); | ||
| 47 | void SetReg(IR::Reg reg, const U32& value); | ||
| 48 | |||
| 49 | [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false); | ||
| 50 | void SetPred(IR::Pred pred, const U1& value); | ||
| 51 | |||
| 52 | [[nodiscard]] U1 GetGotoVariable(u32 id); | ||
| 53 | void SetGotoVariable(u32 id, const U1& value); | ||
| 54 | |||
| 55 | [[nodiscard]] U32 GetIndirectBranchVariable(); | ||
| 56 | void SetIndirectBranchVariable(const U32& value); | ||
| 57 | |||
| 58 | [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset); | ||
| 59 | [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize, | ||
| 60 | bool is_signed); | ||
| 61 | [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset); | ||
| 62 | |||
| 63 | [[nodiscard]] U1 GetZFlag(); | ||
| 64 | [[nodiscard]] U1 GetSFlag(); | ||
| 65 | [[nodiscard]] U1 GetCFlag(); | ||
| 66 | [[nodiscard]] U1 GetOFlag(); | ||
| 67 | |||
| 68 | void SetZFlag(const U1& value); | ||
| 69 | void SetSFlag(const U1& value); | ||
| 70 | void SetCFlag(const U1& value); | ||
| 71 | void SetOFlag(const U1& value); | ||
| 72 | |||
| 73 | [[nodiscard]] U1 Condition(IR::Condition cond); | ||
| 74 | [[nodiscard]] U1 GetFlowTestResult(FlowTest test); | ||
| 75 | |||
| 76 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | ||
| 77 | [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | ||
| 78 | void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | ||
| 79 | |||
| 80 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | ||
| 81 | [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); | ||
| 82 | void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); | ||
| 83 | |||
| 84 | [[nodiscard]] F32 GetPatch(Patch patch); | ||
| 85 | void SetPatch(Patch patch, const F32& value); | ||
| 86 | |||
| 87 | void SetFragColor(u32 index, u32 component, const F32& value); | ||
| 88 | void SetSampleMask(const U32& value); | ||
| 89 | void SetFragDepth(const F32& value); | ||
| 90 | |||
| 91 | [[nodiscard]] U32 WorkgroupIdX(); | ||
| 92 | [[nodiscard]] U32 WorkgroupIdY(); | ||
| 93 | [[nodiscard]] U32 WorkgroupIdZ(); | ||
| 94 | |||
| 95 | [[nodiscard]] Value LocalInvocationId(); | ||
| 96 | [[nodiscard]] U32 LocalInvocationIdX(); | ||
| 97 | [[nodiscard]] U32 LocalInvocationIdY(); | ||
| 98 | [[nodiscard]] U32 LocalInvocationIdZ(); | ||
| 99 | |||
| 100 | [[nodiscard]] U32 InvocationId(); | ||
| 101 | [[nodiscard]] U32 SampleId(); | ||
| 102 | [[nodiscard]] U1 IsHelperInvocation(); | ||
| 103 | [[nodiscard]] F32 YDirection(); | ||
| 104 | |||
| 105 | [[nodiscard]] U32 LaneId(); | ||
| 106 | |||
| 107 | [[nodiscard]] U32 LoadGlobalU8(const U64& address); | ||
| 108 | [[nodiscard]] U32 LoadGlobalS8(const U64& address); | ||
| 109 | [[nodiscard]] U32 LoadGlobalU16(const U64& address); | ||
| 110 | [[nodiscard]] U32 LoadGlobalS16(const U64& address); | ||
| 111 | [[nodiscard]] U32 LoadGlobal32(const U64& address); | ||
| 112 | [[nodiscard]] Value LoadGlobal64(const U64& address); | ||
| 113 | [[nodiscard]] Value LoadGlobal128(const U64& address); | ||
| 114 | |||
| 115 | void WriteGlobalU8(const U64& address, const U32& value); | ||
| 116 | void WriteGlobalS8(const U64& address, const U32& value); | ||
| 117 | void WriteGlobalU16(const U64& address, const U32& value); | ||
| 118 | void WriteGlobalS16(const U64& address, const U32& value); | ||
| 119 | void WriteGlobal32(const U64& address, const U32& value); | ||
| 120 | void WriteGlobal64(const U64& address, const IR::Value& vector); | ||
| 121 | void WriteGlobal128(const U64& address, const IR::Value& vector); | ||
| 122 | |||
| 123 | [[nodiscard]] U32 LoadLocal(const U32& word_offset); | ||
| 124 | void WriteLocal(const U32& word_offset, const U32& value); | ||
| 125 | |||
| 126 | [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset); | ||
| 127 | void WriteShared(int bit_size, const U32& offset, const Value& value); | ||
| 128 | |||
| 129 | [[nodiscard]] U1 GetZeroFromOp(const Value& op); | ||
| 130 | [[nodiscard]] U1 GetSignFromOp(const Value& op); | ||
| 131 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | ||
| 132 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | ||
| 133 | [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||
| 134 | [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); | ||
| 135 | |||
| 136 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | ||
| 137 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | ||
| 138 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | ||
| 139 | const Value& e4); | ||
| 140 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); | ||
| 141 | [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); | ||
| 142 | |||
| 143 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, | ||
| 144 | const Value& false_value); | ||
| 145 | |||
| 146 | void Barrier(); | ||
| 147 | void WorkgroupMemoryBarrier(); | ||
| 148 | void DeviceMemoryBarrier(); | ||
| 149 | |||
| 150 | template <typename Dest, typename Source> | ||
| 151 | [[nodiscard]] Dest BitCast(const Source& value); | ||
| 152 | |||
| 153 | [[nodiscard]] U64 PackUint2x32(const Value& vector); | ||
| 154 | [[nodiscard]] Value UnpackUint2x32(const U64& value); | ||
| 155 | |||
| 156 | [[nodiscard]] U32 PackFloat2x16(const Value& vector); | ||
| 157 | [[nodiscard]] Value UnpackFloat2x16(const U32& value); | ||
| 158 | |||
| 159 | [[nodiscard]] U32 PackHalf2x16(const Value& vector); | ||
| 160 | [[nodiscard]] Value UnpackHalf2x16(const U32& value); | ||
| 161 | |||
| 162 | [[nodiscard]] F64 PackDouble2x32(const Value& vector); | ||
| 163 | [[nodiscard]] Value UnpackDouble2x32(const F64& value); | ||
| 164 | |||
| 165 | [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); | ||
| 166 | [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {}); | ||
| 167 | [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c, | ||
| 168 | FpControl control = {}); | ||
| 169 | |||
| 170 | [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value); | ||
| 171 | [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); | ||
| 172 | [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); | ||
| 173 | |||
| 174 | [[nodiscard]] F32 FPCos(const F32& value); | ||
| 175 | [[nodiscard]] F32 FPSin(const F32& value); | ||
| 176 | [[nodiscard]] F32 FPExp2(const F32& value); | ||
| 177 | [[nodiscard]] F32 FPLog2(const F32& value); | ||
| 178 | [[nodiscard]] F32F64 FPRecip(const F32F64& value); | ||
| 179 | [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | ||
| 180 | [[nodiscard]] F32 FPSqrt(const F32& value); | ||
| 181 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | ||
| 182 | [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||
| 183 | const F16F32F64& max_value); | ||
| 184 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | ||
| 185 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | ||
| 186 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | ||
| 187 | [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); | ||
| 188 | |||
| 189 | [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, | ||
| 190 | bool ordered = true); | ||
| 191 | [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, | ||
| 192 | bool ordered = true); | ||
| 193 | [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {}, | ||
| 194 | bool ordered = true); | ||
| 195 | [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, | ||
| 196 | FpControl control = {}, bool ordered = true); | ||
| 197 | [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, | ||
| 198 | FpControl control = {}, bool ordered = true); | ||
| 199 | [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, | ||
| 200 | FpControl control = {}, bool ordered = true); | ||
| 201 | [[nodiscard]] U1 FPIsNan(const F16F32F64& value); | ||
| 202 | [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs); | ||
| 203 | [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs); | ||
| 204 | [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | ||
| 205 | [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {}); | ||
| 206 | |||
| 207 | [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | ||
| 208 | [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); | ||
| 209 | [[nodiscard]] U32 IMul(const U32& a, const U32& b); | ||
| 210 | [[nodiscard]] U32U64 INeg(const U32U64& value); | ||
| 211 | [[nodiscard]] U32 IAbs(const U32& value); | ||
| 212 | [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift); | ||
| 213 | [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift); | ||
| 214 | [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift); | ||
| 215 | [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); | ||
| 216 | [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); | ||
| 217 | [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); | ||
| 218 | [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||
| 219 | const U32& count); | ||
| 220 | [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||
| 221 | bool is_signed = false); | ||
| 222 | [[nodiscard]] U32 BitReverse(const U32& value); | ||
| 223 | [[nodiscard]] U32 BitCount(const U32& value); | ||
| 224 | [[nodiscard]] U32 BitwiseNot(const U32& value); | ||
| 225 | |||
| 226 | [[nodiscard]] U32 FindSMsb(const U32& value); | ||
| 227 | [[nodiscard]] U32 FindUMsb(const U32& value); | ||
| 228 | [[nodiscard]] U32 SMin(const U32& a, const U32& b); | ||
| 229 | [[nodiscard]] U32 UMin(const U32& a, const U32& b); | ||
| 230 | [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed); | ||
| 231 | [[nodiscard]] U32 SMax(const U32& a, const U32& b); | ||
| 232 | [[nodiscard]] U32 UMax(const U32& a, const U32& b); | ||
| 233 | [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed); | ||
| 234 | [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); | ||
| 235 | [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); | ||
| 236 | |||
| 237 | [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 238 | [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); | ||
| 239 | [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 240 | [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 241 | [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); | ||
| 242 | [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||
| 243 | |||
| 244 | [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); | ||
| 245 | [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); | ||
| 246 | [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); | ||
| 247 | [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); | ||
| 248 | [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); | ||
| 249 | [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); | ||
| 250 | [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); | ||
| 251 | [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); | ||
| 252 | [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); | ||
| 253 | [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); | ||
| 254 | [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); | ||
| 255 | [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); | ||
| 256 | [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); | ||
| 257 | |||
| 258 | [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); | ||
| 259 | [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); | ||
| 260 | [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); | ||
| 261 | [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, | ||
| 262 | bool is_signed); | ||
| 263 | [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); | ||
| 264 | [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); | ||
| 265 | [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, | ||
| 266 | bool is_signed); | ||
| 267 | [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); | ||
| 268 | [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); | ||
| 269 | [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); | ||
| 270 | [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); | ||
| 271 | [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); | ||
| 272 | [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); | ||
| 273 | |||
| 274 | [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||
| 275 | const FpControl control = {}); | ||
| 276 | [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||
| 277 | const FpControl control = {}); | ||
| 278 | [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||
| 279 | const FpControl control = {}); | ||
| 280 | [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||
| 281 | const FpControl control = {}); | ||
| 282 | |||
| 283 | [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); | ||
| 284 | [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); | ||
| 285 | [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); | ||
| 286 | [[nodiscard]] U1 LogicalNot(const U1& value); | ||
| 287 | |||
| 288 | [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value); | ||
| 289 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | ||
| 290 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | ||
| 291 | [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 292 | FpControl control = {}); | ||
| 293 | [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value, | ||
| 294 | FpControl control = {}); | ||
| 295 | [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed, | ||
| 296 | const Value& value, FpControl control = {}); | ||
| 297 | |||
| 298 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | ||
| 299 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value, | ||
| 300 | FpControl control = {}); | ||
| 301 | |||
| 302 | [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords, | ||
| 303 | const F32& bias, const Value& offset, | ||
| 304 | const F32& lod_clamp, TextureInstInfo info); | ||
| 305 | [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords, | ||
| 306 | const F32& lod, const Value& offset, | ||
| 307 | TextureInstInfo info); | ||
| 308 | [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, | ||
| 309 | const F32& dref, const F32& bias, | ||
| 310 | const Value& offset, const F32& lod_clamp, | ||
| 311 | TextureInstInfo info); | ||
| 312 | [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, | ||
| 313 | const F32& dref, const F32& lod, | ||
| 314 | const Value& offset, TextureInstInfo info); | ||
| 315 | [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod); | ||
| 316 | |||
| 317 | [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords, | ||
| 318 | TextureInstInfo info); | ||
| 319 | [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset, | ||
| 320 | const Value& offset2, TextureInstInfo info); | ||
| 321 | [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords, | ||
| 322 | const Value& offset, const Value& offset2, const F32& dref, | ||
| 323 | TextureInstInfo info); | ||
| 324 | [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, | ||
| 325 | const U32& lod, const U32& multisampling, TextureInstInfo info); | ||
| 326 | [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, | ||
| 327 | const Value& derivates, const Value& offset, | ||
| 328 | const F32& lod_clamp, TextureInstInfo info); | ||
| 329 | [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); | ||
| 330 | [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color, | ||
| 331 | TextureInstInfo info); | ||
| 332 | |||
| 333 | [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords, | ||
| 334 | const Value& value, TextureInstInfo info); | ||
| 335 | [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords, | ||
| 336 | const Value& value, TextureInstInfo info); | ||
| 337 | [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords, | ||
| 338 | const Value& value, TextureInstInfo info); | ||
| 339 | [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords, | ||
| 340 | const Value& value, bool is_signed, TextureInstInfo info); | ||
| 341 | [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords, | ||
| 342 | const Value& value, TextureInstInfo info); | ||
| 343 | [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords, | ||
| 344 | const Value& value, TextureInstInfo info); | ||
| 345 | [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords, | ||
| 346 | const Value& value, bool is_signed, TextureInstInfo info); | ||
| 347 | [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value, | ||
| 348 | TextureInstInfo info); | ||
| 349 | [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value, | ||
| 350 | TextureInstInfo info); | ||
| 351 | [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value, | ||
| 352 | TextureInstInfo info); | ||
| 353 | [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value, | ||
| 354 | TextureInstInfo info); | ||
| 355 | [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value, | ||
| 356 | TextureInstInfo info); | ||
| 357 | [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords, | ||
| 358 | const Value& value, TextureInstInfo info); | ||
| 359 | [[nodiscard]] U1 VoteAll(const U1& value); | ||
| 360 | [[nodiscard]] U1 VoteAny(const U1& value); | ||
| 361 | [[nodiscard]] U1 VoteEqual(const U1& value); | ||
| 362 | [[nodiscard]] U32 SubgroupBallot(const U1& value); | ||
| 363 | [[nodiscard]] U32 SubgroupEqMask(); | ||
| 364 | [[nodiscard]] U32 SubgroupLtMask(); | ||
| 365 | [[nodiscard]] U32 SubgroupLeMask(); | ||
| 366 | [[nodiscard]] U32 SubgroupGtMask(); | ||
| 367 | [[nodiscard]] U32 SubgroupGeMask(); | ||
| 368 | [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 369 | const IR::U32& seg_mask); | ||
| 370 | [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 371 | const IR::U32& seg_mask); | ||
| 372 | [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 373 | const IR::U32& seg_mask); | ||
| 374 | [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||
| 375 | const IR::U32& clamp, const IR::U32& seg_mask); | ||
| 376 | [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, | ||
| 377 | FpControl control = {}); | ||
| 378 | |||
| 379 | [[nodiscard]] F32 DPdxFine(const F32& a); | ||
| 380 | |||
| 381 | [[nodiscard]] F32 DPdyFine(const F32& a); | ||
| 382 | |||
| 383 | [[nodiscard]] F32 DPdxCoarse(const F32& a); | ||
| 384 | |||
| 385 | [[nodiscard]] F32 DPdyCoarse(const F32& a); | ||
| 386 | |||
| 387 | private: | ||
| 388 | IR::Block::iterator insertion_point; | ||
| 389 | |||
| 390 | template <typename T = Value, typename... Args> | ||
| 391 | T Inst(Opcode op, Args... args) { | ||
| 392 | auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})}; | ||
| 393 | return T{Value{&*it}}; | ||
| 394 | } | ||
| 395 | |||
| 396 | template <typename T> | ||
| 397 | requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags { | ||
| 398 | Flags() = default; | ||
| 399 | Flags(T proxy_) : proxy{proxy_} {} | ||
| 400 | |||
| 401 | T proxy; | ||
| 402 | }; | ||
| 403 | |||
| 404 | template <typename T = Value, typename FlagType, typename... Args> | ||
| 405 | T Inst(Opcode op, Flags<FlagType> flags, Args... args) { | ||
| 406 | u32 raw_flags{}; | ||
| 407 | std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); | ||
| 408 | auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; | ||
| 409 | return T{Value{&*it}}; | ||
| 410 | } | ||
| 411 | }; | ||
| 412 | |||
| 413 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp new file mode 100644 index 000000000..3dfa5a880 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -0,0 +1,411 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | |||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::IR { | ||
| 13 | namespace { | ||
| 14 | void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) { | ||
| 15 | if (inst && inst->GetOpcode() != opcode) { | ||
| 16 | throw LogicError("Invalid pseudo-instruction"); | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 20 | void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) { | ||
| 21 | if (dest_inst) { | ||
| 22 | throw LogicError("Only one of each type of pseudo-op allowed"); | ||
| 23 | } | ||
| 24 | dest_inst = pseudo_inst; | ||
| 25 | } | ||
| 26 | |||
| 27 | void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) { | ||
| 28 | if (inst->GetOpcode() != expected_opcode) { | ||
| 29 | throw LogicError("Undoing use of invalid pseudo-op"); | ||
| 30 | } | ||
| 31 | inst = nullptr; | ||
| 32 | } | ||
| 33 | |||
| 34 | void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) { | ||
| 35 | if (!associated_insts) { | ||
| 36 | associated_insts = std::make_unique<AssociatedInsts>(); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | } // Anonymous namespace | ||
| 40 | |||
| 41 | Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} { | ||
| 42 | if (op == Opcode::Phi) { | ||
| 43 | std::construct_at(&phi_args); | ||
| 44 | } else { | ||
| 45 | std::construct_at(&args); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | Inst::~Inst() { | ||
| 50 | if (op == Opcode::Phi) { | ||
| 51 | std::destroy_at(&phi_args); | ||
| 52 | } else { | ||
| 53 | std::destroy_at(&args); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | bool Inst::MayHaveSideEffects() const noexcept { | ||
| 58 | switch (op) { | ||
| 59 | case Opcode::ConditionRef: | ||
| 60 | case Opcode::Reference: | ||
| 61 | case Opcode::PhiMove: | ||
| 62 | case Opcode::Prologue: | ||
| 63 | case Opcode::Epilogue: | ||
| 64 | case Opcode::Join: | ||
| 65 | case Opcode::DemoteToHelperInvocation: | ||
| 66 | case Opcode::Barrier: | ||
| 67 | case Opcode::WorkgroupMemoryBarrier: | ||
| 68 | case Opcode::DeviceMemoryBarrier: | ||
| 69 | case Opcode::EmitVertex: | ||
| 70 | case Opcode::EndPrimitive: | ||
| 71 | case Opcode::SetAttribute: | ||
| 72 | case Opcode::SetAttributeIndexed: | ||
| 73 | case Opcode::SetPatch: | ||
| 74 | case Opcode::SetFragColor: | ||
| 75 | case Opcode::SetSampleMask: | ||
| 76 | case Opcode::SetFragDepth: | ||
| 77 | case Opcode::WriteGlobalU8: | ||
| 78 | case Opcode::WriteGlobalS8: | ||
| 79 | case Opcode::WriteGlobalU16: | ||
| 80 | case Opcode::WriteGlobalS16: | ||
| 81 | case Opcode::WriteGlobal32: | ||
| 82 | case Opcode::WriteGlobal64: | ||
| 83 | case Opcode::WriteGlobal128: | ||
| 84 | case Opcode::WriteStorageU8: | ||
| 85 | case Opcode::WriteStorageS8: | ||
| 86 | case Opcode::WriteStorageU16: | ||
| 87 | case Opcode::WriteStorageS16: | ||
| 88 | case Opcode::WriteStorage32: | ||
| 89 | case Opcode::WriteStorage64: | ||
| 90 | case Opcode::WriteStorage128: | ||
| 91 | case Opcode::WriteLocal: | ||
| 92 | case Opcode::WriteSharedU8: | ||
| 93 | case Opcode::WriteSharedU16: | ||
| 94 | case Opcode::WriteSharedU32: | ||
| 95 | case Opcode::WriteSharedU64: | ||
| 96 | case Opcode::WriteSharedU128: | ||
| 97 | case Opcode::SharedAtomicIAdd32: | ||
| 98 | case Opcode::SharedAtomicSMin32: | ||
| 99 | case Opcode::SharedAtomicUMin32: | ||
| 100 | case Opcode::SharedAtomicSMax32: | ||
| 101 | case Opcode::SharedAtomicUMax32: | ||
| 102 | case Opcode::SharedAtomicInc32: | ||
| 103 | case Opcode::SharedAtomicDec32: | ||
| 104 | case Opcode::SharedAtomicAnd32: | ||
| 105 | case Opcode::SharedAtomicOr32: | ||
| 106 | case Opcode::SharedAtomicXor32: | ||
| 107 | case Opcode::SharedAtomicExchange32: | ||
| 108 | case Opcode::SharedAtomicExchange64: | ||
| 109 | case Opcode::GlobalAtomicIAdd32: | ||
| 110 | case Opcode::GlobalAtomicSMin32: | ||
| 111 | case Opcode::GlobalAtomicUMin32: | ||
| 112 | case Opcode::GlobalAtomicSMax32: | ||
| 113 | case Opcode::GlobalAtomicUMax32: | ||
| 114 | case Opcode::GlobalAtomicInc32: | ||
| 115 | case Opcode::GlobalAtomicDec32: | ||
| 116 | case Opcode::GlobalAtomicAnd32: | ||
| 117 | case Opcode::GlobalAtomicOr32: | ||
| 118 | case Opcode::GlobalAtomicXor32: | ||
| 119 | case Opcode::GlobalAtomicExchange32: | ||
| 120 | case Opcode::GlobalAtomicIAdd64: | ||
| 121 | case Opcode::GlobalAtomicSMin64: | ||
| 122 | case Opcode::GlobalAtomicUMin64: | ||
| 123 | case Opcode::GlobalAtomicSMax64: | ||
| 124 | case Opcode::GlobalAtomicUMax64: | ||
| 125 | case Opcode::GlobalAtomicAnd64: | ||
| 126 | case Opcode::GlobalAtomicOr64: | ||
| 127 | case Opcode::GlobalAtomicXor64: | ||
| 128 | case Opcode::GlobalAtomicExchange64: | ||
| 129 | case Opcode::GlobalAtomicAddF32: | ||
| 130 | case Opcode::GlobalAtomicAddF16x2: | ||
| 131 | case Opcode::GlobalAtomicAddF32x2: | ||
| 132 | case Opcode::GlobalAtomicMinF16x2: | ||
| 133 | case Opcode::GlobalAtomicMinF32x2: | ||
| 134 | case Opcode::GlobalAtomicMaxF16x2: | ||
| 135 | case Opcode::GlobalAtomicMaxF32x2: | ||
| 136 | case Opcode::StorageAtomicIAdd32: | ||
| 137 | case Opcode::StorageAtomicSMin32: | ||
| 138 | case Opcode::StorageAtomicUMin32: | ||
| 139 | case Opcode::StorageAtomicSMax32: | ||
| 140 | case Opcode::StorageAtomicUMax32: | ||
| 141 | case Opcode::StorageAtomicInc32: | ||
| 142 | case Opcode::StorageAtomicDec32: | ||
| 143 | case Opcode::StorageAtomicAnd32: | ||
| 144 | case Opcode::StorageAtomicOr32: | ||
| 145 | case Opcode::StorageAtomicXor32: | ||
| 146 | case Opcode::StorageAtomicExchange32: | ||
| 147 | case Opcode::StorageAtomicIAdd64: | ||
| 148 | case Opcode::StorageAtomicSMin64: | ||
| 149 | case Opcode::StorageAtomicUMin64: | ||
| 150 | case Opcode::StorageAtomicSMax64: | ||
| 151 | case Opcode::StorageAtomicUMax64: | ||
| 152 | case Opcode::StorageAtomicAnd64: | ||
| 153 | case Opcode::StorageAtomicOr64: | ||
| 154 | case Opcode::StorageAtomicXor64: | ||
| 155 | case Opcode::StorageAtomicExchange64: | ||
| 156 | case Opcode::StorageAtomicAddF32: | ||
| 157 | case Opcode::StorageAtomicAddF16x2: | ||
| 158 | case Opcode::StorageAtomicAddF32x2: | ||
| 159 | case Opcode::StorageAtomicMinF16x2: | ||
| 160 | case Opcode::StorageAtomicMinF32x2: | ||
| 161 | case Opcode::StorageAtomicMaxF16x2: | ||
| 162 | case Opcode::StorageAtomicMaxF32x2: | ||
| 163 | case Opcode::BindlessImageWrite: | ||
| 164 | case Opcode::BoundImageWrite: | ||
| 165 | case Opcode::ImageWrite: | ||
| 166 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 167 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 168 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 169 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 170 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 171 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 172 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 173 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 174 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 175 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 176 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 177 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 178 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 179 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 180 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 181 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 182 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 183 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 184 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 185 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 186 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 187 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 188 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 189 | case IR::Opcode::ImageAtomicSMin32: | ||
| 190 | case IR::Opcode::ImageAtomicUMin32: | ||
| 191 | case IR::Opcode::ImageAtomicSMax32: | ||
| 192 | case IR::Opcode::ImageAtomicUMax32: | ||
| 193 | case IR::Opcode::ImageAtomicInc32: | ||
| 194 | case IR::Opcode::ImageAtomicDec32: | ||
| 195 | case IR::Opcode::ImageAtomicAnd32: | ||
| 196 | case IR::Opcode::ImageAtomicOr32: | ||
| 197 | case IR::Opcode::ImageAtomicXor32: | ||
| 198 | case IR::Opcode::ImageAtomicExchange32: | ||
| 199 | return true; | ||
| 200 | default: | ||
| 201 | return false; | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | bool Inst::IsPseudoInstruction() const noexcept { | ||
| 206 | switch (op) { | ||
| 207 | case Opcode::GetZeroFromOp: | ||
| 208 | case Opcode::GetSignFromOp: | ||
| 209 | case Opcode::GetCarryFromOp: | ||
| 210 | case Opcode::GetOverflowFromOp: | ||
| 211 | case Opcode::GetSparseFromOp: | ||
| 212 | case Opcode::GetInBoundsFromOp: | ||
| 213 | return true; | ||
| 214 | default: | ||
| 215 | return false; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | bool Inst::AreAllArgsImmediates() const { | ||
| 220 | if (op == Opcode::Phi) { | ||
| 221 | throw LogicError("Testing for all arguments are immediates on phi instruction"); | ||
| 222 | } | ||
| 223 | return std::all_of(args.begin(), args.begin() + NumArgs(), | ||
| 224 | [](const IR::Value& value) { return value.IsImmediate(); }); | ||
| 225 | } | ||
| 226 | |||
| 227 | Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | ||
| 228 | if (!associated_insts) { | ||
| 229 | return nullptr; | ||
| 230 | } | ||
| 231 | switch (opcode) { | ||
| 232 | case Opcode::GetZeroFromOp: | ||
| 233 | CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp); | ||
| 234 | return associated_insts->zero_inst; | ||
| 235 | case Opcode::GetSignFromOp: | ||
| 236 | CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp); | ||
| 237 | return associated_insts->sign_inst; | ||
| 238 | case Opcode::GetCarryFromOp: | ||
| 239 | CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp); | ||
| 240 | return associated_insts->carry_inst; | ||
| 241 | case Opcode::GetOverflowFromOp: | ||
| 242 | CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp); | ||
| 243 | return associated_insts->overflow_inst; | ||
| 244 | case Opcode::GetSparseFromOp: | ||
| 245 | CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||
| 246 | return associated_insts->sparse_inst; | ||
| 247 | case Opcode::GetInBoundsFromOp: | ||
| 248 | CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||
| 249 | return associated_insts->in_bounds_inst; | ||
| 250 | default: | ||
| 251 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | IR::Type Inst::Type() const { | ||
| 256 | return TypeOf(op); | ||
| 257 | } | ||
| 258 | |||
| 259 | void Inst::SetArg(size_t index, Value value) { | ||
| 260 | if (index >= NumArgs()) { | ||
| 261 | throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op); | ||
| 262 | } | ||
| 263 | const IR::Value arg{Arg(index)}; | ||
| 264 | if (!arg.IsImmediate()) { | ||
| 265 | UndoUse(arg); | ||
| 266 | } | ||
| 267 | if (!value.IsImmediate()) { | ||
| 268 | Use(value); | ||
| 269 | } | ||
| 270 | if (op == Opcode::Phi) { | ||
| 271 | phi_args[index].second = value; | ||
| 272 | } else { | ||
| 273 | args[index] = value; | ||
| 274 | } | ||
| 275 | } | ||
| 276 | |||
| 277 | Block* Inst::PhiBlock(size_t index) const { | ||
| 278 | if (op != Opcode::Phi) { | ||
| 279 | throw LogicError("{} is not a Phi instruction", op); | ||
| 280 | } | ||
| 281 | if (index >= phi_args.size()) { | ||
| 282 | throw InvalidArgument("Out of bounds argument index {} in phi instruction"); | ||
| 283 | } | ||
| 284 | return phi_args[index].first; | ||
| 285 | } | ||
| 286 | |||
| 287 | void Inst::AddPhiOperand(Block* predecessor, const Value& value) { | ||
| 288 | if (!value.IsImmediate()) { | ||
| 289 | Use(value); | ||
| 290 | } | ||
| 291 | phi_args.emplace_back(predecessor, value); | ||
| 292 | } | ||
| 293 | |||
| 294 | void Inst::Invalidate() { | ||
| 295 | ClearArgs(); | ||
| 296 | ReplaceOpcode(Opcode::Void); | ||
| 297 | } | ||
| 298 | |||
| 299 | void Inst::ClearArgs() { | ||
| 300 | if (op == Opcode::Phi) { | ||
| 301 | for (auto& pair : phi_args) { | ||
| 302 | IR::Value& value{pair.second}; | ||
| 303 | if (!value.IsImmediate()) { | ||
| 304 | UndoUse(value); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | phi_args.clear(); | ||
| 308 | } else { | ||
| 309 | for (auto& value : args) { | ||
| 310 | if (!value.IsImmediate()) { | ||
| 311 | UndoUse(value); | ||
| 312 | } | ||
| 313 | } | ||
| 314 | // Reset arguments to null | ||
| 315 | // std::memset was measured to be faster on MSVC than std::ranges:fill | ||
| 316 | std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args)); | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | void Inst::ReplaceUsesWith(Value replacement) { | ||
| 321 | Invalidate(); | ||
| 322 | ReplaceOpcode(Opcode::Identity); | ||
| 323 | if (!replacement.IsImmediate()) { | ||
| 324 | Use(replacement); | ||
| 325 | } | ||
| 326 | args[0] = replacement; | ||
| 327 | } | ||
| 328 | |||
| 329 | void Inst::ReplaceOpcode(IR::Opcode opcode) { | ||
| 330 | if (opcode == IR::Opcode::Phi) { | ||
| 331 | throw LogicError("Cannot transition into Phi"); | ||
| 332 | } | ||
| 333 | if (op == Opcode::Phi) { | ||
| 334 | // Transition out of phi arguments into non-phi | ||
| 335 | std::destroy_at(&phi_args); | ||
| 336 | std::construct_at(&args); | ||
| 337 | } | ||
| 338 | op = opcode; | ||
| 339 | } | ||
| 340 | |||
| 341 | void Inst::Use(const Value& value) { | ||
| 342 | Inst* const inst{value.Inst()}; | ||
| 343 | ++inst->use_count; | ||
| 344 | |||
| 345 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 346 | switch (op) { | ||
| 347 | case Opcode::GetZeroFromOp: | ||
| 348 | AllocAssociatedInsts(assoc_inst); | ||
| 349 | SetPseudoInstruction(assoc_inst->zero_inst, this); | ||
| 350 | break; | ||
| 351 | case Opcode::GetSignFromOp: | ||
| 352 | AllocAssociatedInsts(assoc_inst); | ||
| 353 | SetPseudoInstruction(assoc_inst->sign_inst, this); | ||
| 354 | break; | ||
| 355 | case Opcode::GetCarryFromOp: | ||
| 356 | AllocAssociatedInsts(assoc_inst); | ||
| 357 | SetPseudoInstruction(assoc_inst->carry_inst, this); | ||
| 358 | break; | ||
| 359 | case Opcode::GetOverflowFromOp: | ||
| 360 | AllocAssociatedInsts(assoc_inst); | ||
| 361 | SetPseudoInstruction(assoc_inst->overflow_inst, this); | ||
| 362 | break; | ||
| 363 | case Opcode::GetSparseFromOp: | ||
| 364 | AllocAssociatedInsts(assoc_inst); | ||
| 365 | SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||
| 366 | break; | ||
| 367 | case Opcode::GetInBoundsFromOp: | ||
| 368 | AllocAssociatedInsts(assoc_inst); | ||
| 369 | SetPseudoInstruction(assoc_inst->in_bounds_inst, this); | ||
| 370 | break; | ||
| 371 | default: | ||
| 372 | break; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | void Inst::UndoUse(const Value& value) { | ||
| 377 | Inst* const inst{value.Inst()}; | ||
| 378 | --inst->use_count; | ||
| 379 | |||
| 380 | std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts}; | ||
| 381 | switch (op) { | ||
| 382 | case Opcode::GetZeroFromOp: | ||
| 383 | AllocAssociatedInsts(assoc_inst); | ||
| 384 | RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp); | ||
| 385 | break; | ||
| 386 | case Opcode::GetSignFromOp: | ||
| 387 | AllocAssociatedInsts(assoc_inst); | ||
| 388 | RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp); | ||
| 389 | break; | ||
| 390 | case Opcode::GetCarryFromOp: | ||
| 391 | AllocAssociatedInsts(assoc_inst); | ||
| 392 | RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp); | ||
| 393 | break; | ||
| 394 | case Opcode::GetOverflowFromOp: | ||
| 395 | AllocAssociatedInsts(assoc_inst); | ||
| 396 | RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||
| 397 | break; | ||
| 398 | case Opcode::GetSparseFromOp: | ||
| 399 | AllocAssociatedInsts(assoc_inst); | ||
| 400 | RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp); | ||
| 401 | break; | ||
| 402 | case Opcode::GetInBoundsFromOp: | ||
| 403 | AllocAssociatedInsts(assoc_inst); | ||
| 404 | RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||
| 405 | break; | ||
| 406 | default: | ||
| 407 | break; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h new file mode 100644 index 000000000..77cda1f8a --- /dev/null +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/shader_info.h" | ||
| 10 | |||
| 11 | namespace Shader::IR { | ||
| 12 | |||
| 13 | enum class FmzMode : u8 { | ||
| 14 | DontCare, // Not specified for this instruction | ||
| 15 | FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) | ||
| 16 | FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9) | ||
| 17 | None, // Denorms are not flushed, NAN is propagated (nouveau) | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class FpRounding : u8 { | ||
| 21 | DontCare, // Not specified for this instruction | ||
| 22 | RN, // Round to nearest even, | ||
| 23 | RM, // Round towards negative infinity | ||
| 24 | RP, // Round towards positive infinity | ||
| 25 | RZ, // Round towards zero | ||
| 26 | }; | ||
| 27 | |||
| 28 | struct FpControl { | ||
| 29 | bool no_contraction{false}; | ||
| 30 | FpRounding rounding{FpRounding::DontCare}; | ||
| 31 | FmzMode fmz_mode{FmzMode::DontCare}; | ||
| 32 | }; | ||
| 33 | static_assert(sizeof(FpControl) <= sizeof(u32)); | ||
| 34 | |||
| 35 | union TextureInstInfo { | ||
| 36 | u32 raw; | ||
| 37 | BitField<0, 16, u32> descriptor_index; | ||
| 38 | BitField<16, 3, TextureType> type; | ||
| 39 | BitField<19, 1, u32> is_depth; | ||
| 40 | BitField<20, 1, u32> has_bias; | ||
| 41 | BitField<21, 1, u32> has_lod_clamp; | ||
| 42 | BitField<22, 1, u32> relaxed_precision; | ||
| 43 | BitField<23, 2, u32> gather_component; | ||
| 44 | BitField<25, 2, u32> num_derivates; | ||
| 45 | BitField<27, 3, ImageFormat> image_format; | ||
| 46 | }; | ||
| 47 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | ||
| 48 | |||
| 49 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp new file mode 100644 index 000000000..24d024ad7 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.cpp | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | |||
| 11 | std::string_view NameOf(Opcode op) { | ||
| 12 | return Detail::META_TABLE[static_cast<size_t>(op)].name; | ||
| 13 | } | ||
| 14 | |||
| 15 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h new file mode 100644 index 000000000..9ab108292 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.h | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <string_view> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 14 | |||
| 15 | namespace Shader::IR { | ||
| 16 | |||
| 17 | enum class Opcode { | ||
| 18 | #define OPCODE(name, ...) name, | ||
| 19 | #include "opcodes.inc" | ||
| 20 | #undef OPCODE | ||
| 21 | }; | ||
| 22 | |||
| 23 | namespace Detail { | ||
| 24 | struct OpcodeMeta { | ||
| 25 | std::string_view name; | ||
| 26 | Type type; | ||
| 27 | std::array<Type, 5> arg_types; | ||
| 28 | }; | ||
| 29 | |||
| 30 | // using enum Type; | ||
| 31 | constexpr Type Void{Type::Void}; | ||
| 32 | constexpr Type Opaque{Type::Opaque}; | ||
| 33 | constexpr Type Reg{Type::Reg}; | ||
| 34 | constexpr Type Pred{Type::Pred}; | ||
| 35 | constexpr Type Attribute{Type::Attribute}; | ||
| 36 | constexpr Type Patch{Type::Patch}; | ||
| 37 | constexpr Type U1{Type::U1}; | ||
| 38 | constexpr Type U8{Type::U8}; | ||
| 39 | constexpr Type U16{Type::U16}; | ||
| 40 | constexpr Type U32{Type::U32}; | ||
| 41 | constexpr Type U64{Type::U64}; | ||
| 42 | constexpr Type F16{Type::F16}; | ||
| 43 | constexpr Type F32{Type::F32}; | ||
| 44 | constexpr Type F64{Type::F64}; | ||
| 45 | constexpr Type U32x2{Type::U32x2}; | ||
| 46 | constexpr Type U32x3{Type::U32x3}; | ||
| 47 | constexpr Type U32x4{Type::U32x4}; | ||
| 48 | constexpr Type F16x2{Type::F16x2}; | ||
| 49 | constexpr Type F16x3{Type::F16x3}; | ||
| 50 | constexpr Type F16x4{Type::F16x4}; | ||
| 51 | constexpr Type F32x2{Type::F32x2}; | ||
| 52 | constexpr Type F32x3{Type::F32x3}; | ||
| 53 | constexpr Type F32x4{Type::F32x4}; | ||
| 54 | constexpr Type F64x2{Type::F64x2}; | ||
| 55 | constexpr Type F64x3{Type::F64x3}; | ||
| 56 | constexpr Type F64x4{Type::F64x4}; | ||
| 57 | |||
| 58 | constexpr OpcodeMeta META_TABLE[]{ | ||
| 59 | #define OPCODE(name_token, type_token, ...) \ | ||
| 60 | { \ | ||
| 61 | .name{#name_token}, \ | ||
| 62 | .type = type_token, \ | ||
| 63 | .arg_types{__VA_ARGS__}, \ | ||
| 64 | }, | ||
| 65 | #include "opcodes.inc" | ||
| 66 | #undef OPCODE | ||
| 67 | }; | ||
| 68 | constexpr size_t CalculateNumArgsOf(Opcode op) { | ||
| 69 | const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types}; | ||
| 70 | return static_cast<size_t>( | ||
| 71 | std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void))); | ||
| 72 | } | ||
| 73 | |||
| 74 | constexpr u8 NUM_ARGS[]{ | ||
| 75 | #define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)), | ||
| 76 | #include "opcodes.inc" | ||
| 77 | #undef OPCODE | ||
| 78 | }; | ||
| 79 | } // namespace Detail | ||
| 80 | |||
| 81 | /// Get return type of an opcode | ||
| 82 | [[nodiscard]] inline Type TypeOf(Opcode op) noexcept { | ||
| 83 | return Detail::META_TABLE[static_cast<size_t>(op)].type; | ||
| 84 | } | ||
| 85 | |||
| 86 | /// Get the number of arguments an opcode accepts | ||
| 87 | [[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept { | ||
| 88 | return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]); | ||
| 89 | } | ||
| 90 | |||
| 91 | /// Get the required type of an argument of an opcode | ||
| 92 | [[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept { | ||
| 93 | return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index]; | ||
| 94 | } | ||
| 95 | |||
| 96 | /// Get the name of an opcode | ||
| 97 | [[nodiscard]] std::string_view NameOf(Opcode op); | ||
| 98 | |||
| 99 | } // namespace Shader::IR | ||
| 100 | |||
| 101 | template <> | ||
| 102 | struct fmt::formatter<Shader::IR::Opcode> { | ||
| 103 | constexpr auto parse(format_parse_context& ctx) { | ||
| 104 | return ctx.begin(); | ||
| 105 | } | ||
| 106 | template <typename FormatContext> | ||
| 107 | auto format(const Shader::IR::Opcode& op, FormatContext& ctx) { | ||
| 108 | return format_to(ctx.out(), "{}", Shader::IR::NameOf(op)); | ||
| 109 | } | ||
| 110 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc new file mode 100644 index 000000000..d91098c80 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -0,0 +1,550 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ... | ||
| 6 | OPCODE(Phi, Opaque, ) | ||
| 7 | OPCODE(Identity, Opaque, Opaque, ) | ||
| 8 | OPCODE(Void, Void, ) | ||
| 9 | OPCODE(ConditionRef, U1, U1, ) | ||
| 10 | OPCODE(Reference, Void, Opaque, ) | ||
| 11 | OPCODE(PhiMove, Void, Opaque, Opaque, ) | ||
| 12 | |||
| 13 | // Special operations | ||
| 14 | OPCODE(Prologue, Void, ) | ||
| 15 | OPCODE(Epilogue, Void, ) | ||
| 16 | OPCODE(Join, Void, ) | ||
| 17 | OPCODE(DemoteToHelperInvocation, Void, ) | ||
| 18 | OPCODE(EmitVertex, Void, U32, ) | ||
| 19 | OPCODE(EndPrimitive, Void, U32, ) | ||
| 20 | |||
| 21 | // Barriers | ||
| 22 | OPCODE(Barrier, Void, ) | ||
| 23 | OPCODE(WorkgroupMemoryBarrier, Void, ) | ||
| 24 | OPCODE(DeviceMemoryBarrier, Void, ) | ||
| 25 | |||
| 26 | // Context getters/setters | ||
| 27 | OPCODE(GetRegister, U32, Reg, ) | ||
| 28 | OPCODE(SetRegister, Void, Reg, U32, ) | ||
| 29 | OPCODE(GetPred, U1, Pred, ) | ||
| 30 | OPCODE(SetPred, Void, Pred, U1, ) | ||
| 31 | OPCODE(GetGotoVariable, U1, U32, ) | ||
| 32 | OPCODE(SetGotoVariable, Void, U32, U1, ) | ||
| 33 | OPCODE(GetIndirectBranchVariable, U32, ) | ||
| 34 | OPCODE(SetIndirectBranchVariable, Void, U32, ) | ||
| 35 | OPCODE(GetCbufU8, U32, U32, U32, ) | ||
| 36 | OPCODE(GetCbufS8, U32, U32, U32, ) | ||
| 37 | OPCODE(GetCbufU16, U32, U32, U32, ) | ||
| 38 | OPCODE(GetCbufS16, U32, U32, U32, ) | ||
| 39 | OPCODE(GetCbufU32, U32, U32, U32, ) | ||
| 40 | OPCODE(GetCbufF32, F32, U32, U32, ) | ||
| 41 | OPCODE(GetCbufU32x2, U32x2, U32, U32, ) | ||
| 42 | OPCODE(GetAttribute, F32, Attribute, U32, ) | ||
| 43 | OPCODE(SetAttribute, Void, Attribute, F32, U32, ) | ||
| 44 | OPCODE(GetAttributeIndexed, F32, U32, U32, ) | ||
| 45 | OPCODE(SetAttributeIndexed, Void, U32, F32, U32, ) | ||
| 46 | OPCODE(GetPatch, F32, Patch, ) | ||
| 47 | OPCODE(SetPatch, Void, Patch, F32, ) | ||
| 48 | OPCODE(SetFragColor, Void, U32, U32, F32, ) | ||
| 49 | OPCODE(SetSampleMask, Void, U32, ) | ||
| 50 | OPCODE(SetFragDepth, Void, F32, ) | ||
| 51 | OPCODE(GetZFlag, U1, Void, ) | ||
| 52 | OPCODE(GetSFlag, U1, Void, ) | ||
| 53 | OPCODE(GetCFlag, U1, Void, ) | ||
| 54 | OPCODE(GetOFlag, U1, Void, ) | ||
| 55 | OPCODE(SetZFlag, Void, U1, ) | ||
| 56 | OPCODE(SetSFlag, Void, U1, ) | ||
| 57 | OPCODE(SetCFlag, Void, U1, ) | ||
| 58 | OPCODE(SetOFlag, Void, U1, ) | ||
| 59 | OPCODE(WorkgroupId, U32x3, ) | ||
| 60 | OPCODE(LocalInvocationId, U32x3, ) | ||
| 61 | OPCODE(InvocationId, U32, ) | ||
| 62 | OPCODE(SampleId, U32, ) | ||
| 63 | OPCODE(IsHelperInvocation, U1, ) | ||
| 64 | OPCODE(YDirection, F32, ) | ||
| 65 | |||
| 66 | // Undefined | ||
| 67 | OPCODE(UndefU1, U1, ) | ||
| 68 | OPCODE(UndefU8, U8, ) | ||
| 69 | OPCODE(UndefU16, U16, ) | ||
| 70 | OPCODE(UndefU32, U32, ) | ||
| 71 | OPCODE(UndefU64, U64, ) | ||
| 72 | |||
| 73 | // Memory operations | ||
| 74 | OPCODE(LoadGlobalU8, U32, Opaque, ) | ||
| 75 | OPCODE(LoadGlobalS8, U32, Opaque, ) | ||
| 76 | OPCODE(LoadGlobalU16, U32, Opaque, ) | ||
| 77 | OPCODE(LoadGlobalS16, U32, Opaque, ) | ||
| 78 | OPCODE(LoadGlobal32, U32, Opaque, ) | ||
| 79 | OPCODE(LoadGlobal64, U32x2, Opaque, ) | ||
| 80 | OPCODE(LoadGlobal128, U32x4, Opaque, ) | ||
| 81 | OPCODE(WriteGlobalU8, Void, Opaque, U32, ) | ||
| 82 | OPCODE(WriteGlobalS8, Void, Opaque, U32, ) | ||
| 83 | OPCODE(WriteGlobalU16, Void, Opaque, U32, ) | ||
| 84 | OPCODE(WriteGlobalS16, Void, Opaque, U32, ) | ||
| 85 | OPCODE(WriteGlobal32, Void, Opaque, U32, ) | ||
| 86 | OPCODE(WriteGlobal64, Void, Opaque, U32x2, ) | ||
| 87 | OPCODE(WriteGlobal128, Void, Opaque, U32x4, ) | ||
| 88 | |||
| 89 | // Storage buffer operations | ||
| 90 | OPCODE(LoadStorageU8, U32, U32, U32, ) | ||
| 91 | OPCODE(LoadStorageS8, U32, U32, U32, ) | ||
| 92 | OPCODE(LoadStorageU16, U32, U32, U32, ) | ||
| 93 | OPCODE(LoadStorageS16, U32, U32, U32, ) | ||
| 94 | OPCODE(LoadStorage32, U32, U32, U32, ) | ||
| 95 | OPCODE(LoadStorage64, U32x2, U32, U32, ) | ||
| 96 | OPCODE(LoadStorage128, U32x4, U32, U32, ) | ||
| 97 | OPCODE(WriteStorageU8, Void, U32, U32, U32, ) | ||
| 98 | OPCODE(WriteStorageS8, Void, U32, U32, U32, ) | ||
| 99 | OPCODE(WriteStorageU16, Void, U32, U32, U32, ) | ||
| 100 | OPCODE(WriteStorageS16, Void, U32, U32, U32, ) | ||
| 101 | OPCODE(WriteStorage32, Void, U32, U32, U32, ) | ||
| 102 | OPCODE(WriteStorage64, Void, U32, U32, U32x2, ) | ||
| 103 | OPCODE(WriteStorage128, Void, U32, U32, U32x4, ) | ||
| 104 | |||
| 105 | // Local memory operations | ||
| 106 | OPCODE(LoadLocal, U32, U32, ) | ||
| 107 | OPCODE(WriteLocal, Void, U32, U32, ) | ||
| 108 | |||
| 109 | // Shared memory operations | ||
| 110 | OPCODE(LoadSharedU8, U32, U32, ) | ||
| 111 | OPCODE(LoadSharedS8, U32, U32, ) | ||
| 112 | OPCODE(LoadSharedU16, U32, U32, ) | ||
| 113 | OPCODE(LoadSharedS16, U32, U32, ) | ||
| 114 | OPCODE(LoadSharedU32, U32, U32, ) | ||
| 115 | OPCODE(LoadSharedU64, U32x2, U32, ) | ||
| 116 | OPCODE(LoadSharedU128, U32x4, U32, ) | ||
| 117 | OPCODE(WriteSharedU8, Void, U32, U32, ) | ||
| 118 | OPCODE(WriteSharedU16, Void, U32, U32, ) | ||
| 119 | OPCODE(WriteSharedU32, Void, U32, U32, ) | ||
| 120 | OPCODE(WriteSharedU64, Void, U32, U32x2, ) | ||
| 121 | OPCODE(WriteSharedU128, Void, U32, U32x4, ) | ||
| 122 | |||
| 123 | // Vector utility | ||
| 124 | OPCODE(CompositeConstructU32x2, U32x2, U32, U32, ) | ||
| 125 | OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, ) | ||
| 126 | OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, ) | ||
| 127 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | ||
| 128 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | ||
| 129 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | ||
| 130 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | ||
| 131 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | ||
| 132 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | ||
| 133 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | ||
| 134 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | ||
| 135 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | ||
| 136 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | ||
| 137 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | ||
| 138 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | ||
| 139 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | ||
| 140 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | ||
| 141 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | ||
| 142 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | ||
| 143 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | ||
| 144 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | ||
| 145 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | ||
| 146 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | ||
| 147 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | ||
| 148 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | ||
| 149 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | ||
| 150 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | ||
| 151 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | ||
| 152 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | ||
| 153 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | ||
| 154 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | ||
| 155 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | ||
| 156 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | ||
| 157 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | ||
| 158 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | ||
| 159 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | ||
| 160 | |||
| 161 | // Select operations | ||
| 162 | OPCODE(SelectU1, U1, U1, U1, U1, ) | ||
| 163 | OPCODE(SelectU8, U8, U1, U8, U8, ) | ||
| 164 | OPCODE(SelectU16, U16, U1, U16, U16, ) | ||
| 165 | OPCODE(SelectU32, U32, U1, U32, U32, ) | ||
| 166 | OPCODE(SelectU64, U64, U1, U64, U64, ) | ||
| 167 | OPCODE(SelectF16, F16, U1, F16, F16, ) | ||
| 168 | OPCODE(SelectF32, F32, U1, F32, F32, ) | ||
| 169 | OPCODE(SelectF64, F64, U1, F64, F64, ) | ||
| 170 | |||
| 171 | // Bitwise conversions | ||
| 172 | OPCODE(BitCastU16F16, U16, F16, ) | ||
| 173 | OPCODE(BitCastU32F32, U32, F32, ) | ||
| 174 | OPCODE(BitCastU64F64, U64, F64, ) | ||
| 175 | OPCODE(BitCastF16U16, F16, U16, ) | ||
| 176 | OPCODE(BitCastF32U32, F32, U32, ) | ||
| 177 | OPCODE(BitCastF64U64, F64, U64, ) | ||
| 178 | OPCODE(PackUint2x32, U64, U32x2, ) | ||
| 179 | OPCODE(UnpackUint2x32, U32x2, U64, ) | ||
| 180 | OPCODE(PackFloat2x16, U32, F16x2, ) | ||
| 181 | OPCODE(UnpackFloat2x16, F16x2, U32, ) | ||
| 182 | OPCODE(PackHalf2x16, U32, F32x2, ) | ||
| 183 | OPCODE(UnpackHalf2x16, F32x2, U32, ) | ||
| 184 | OPCODE(PackDouble2x32, F64, U32x2, ) | ||
| 185 | OPCODE(UnpackDouble2x32, U32x2, F64, ) | ||
| 186 | |||
| 187 | // Pseudo-operation, handled specially at final emit | ||
| 188 | OPCODE(GetZeroFromOp, U1, Opaque, ) | ||
| 189 | OPCODE(GetSignFromOp, U1, Opaque, ) | ||
| 190 | OPCODE(GetCarryFromOp, U1, Opaque, ) | ||
| 191 | OPCODE(GetOverflowFromOp, U1, Opaque, ) | ||
| 192 | OPCODE(GetSparseFromOp, U1, Opaque, ) | ||
| 193 | OPCODE(GetInBoundsFromOp, U1, Opaque, ) | ||
| 194 | |||
| 195 | // Floating-point operations | ||
| 196 | OPCODE(FPAbs16, F16, F16, ) | ||
| 197 | OPCODE(FPAbs32, F32, F32, ) | ||
| 198 | OPCODE(FPAbs64, F64, F64, ) | ||
| 199 | OPCODE(FPAdd16, F16, F16, F16, ) | ||
| 200 | OPCODE(FPAdd32, F32, F32, F32, ) | ||
| 201 | OPCODE(FPAdd64, F64, F64, F64, ) | ||
| 202 | OPCODE(FPFma16, F16, F16, F16, F16, ) | ||
| 203 | OPCODE(FPFma32, F32, F32, F32, F32, ) | ||
| 204 | OPCODE(FPFma64, F64, F64, F64, F64, ) | ||
| 205 | OPCODE(FPMax32, F32, F32, F32, ) | ||
| 206 | OPCODE(FPMax64, F64, F64, F64, ) | ||
| 207 | OPCODE(FPMin32, F32, F32, F32, ) | ||
| 208 | OPCODE(FPMin64, F64, F64, F64, ) | ||
| 209 | OPCODE(FPMul16, F16, F16, F16, ) | ||
| 210 | OPCODE(FPMul32, F32, F32, F32, ) | ||
| 211 | OPCODE(FPMul64, F64, F64, F64, ) | ||
| 212 | OPCODE(FPNeg16, F16, F16, ) | ||
| 213 | OPCODE(FPNeg32, F32, F32, ) | ||
| 214 | OPCODE(FPNeg64, F64, F64, ) | ||
| 215 | OPCODE(FPRecip32, F32, F32, ) | ||
| 216 | OPCODE(FPRecip64, F64, F64, ) | ||
| 217 | OPCODE(FPRecipSqrt32, F32, F32, ) | ||
| 218 | OPCODE(FPRecipSqrt64, F64, F64, ) | ||
| 219 | OPCODE(FPSqrt, F32, F32, ) | ||
| 220 | OPCODE(FPSin, F32, F32, ) | ||
| 221 | OPCODE(FPExp2, F32, F32, ) | ||
| 222 | OPCODE(FPCos, F32, F32, ) | ||
| 223 | OPCODE(FPLog2, F32, F32, ) | ||
| 224 | OPCODE(FPSaturate16, F16, F16, ) | ||
| 225 | OPCODE(FPSaturate32, F32, F32, ) | ||
| 226 | OPCODE(FPSaturate64, F64, F64, ) | ||
| 227 | OPCODE(FPClamp16, F16, F16, F16, F16, ) | ||
| 228 | OPCODE(FPClamp32, F32, F32, F32, F32, ) | ||
| 229 | OPCODE(FPClamp64, F64, F64, F64, F64, ) | ||
| 230 | OPCODE(FPRoundEven16, F16, F16, ) | ||
| 231 | OPCODE(FPRoundEven32, F32, F32, ) | ||
| 232 | OPCODE(FPRoundEven64, F64, F64, ) | ||
| 233 | OPCODE(FPFloor16, F16, F16, ) | ||
| 234 | OPCODE(FPFloor32, F32, F32, ) | ||
| 235 | OPCODE(FPFloor64, F64, F64, ) | ||
| 236 | OPCODE(FPCeil16, F16, F16, ) | ||
| 237 | OPCODE(FPCeil32, F32, F32, ) | ||
| 238 | OPCODE(FPCeil64, F64, F64, ) | ||
| 239 | OPCODE(FPTrunc16, F16, F16, ) | ||
| 240 | OPCODE(FPTrunc32, F32, F32, ) | ||
| 241 | OPCODE(FPTrunc64, F64, F64, ) | ||
| 242 | |||
| 243 | OPCODE(FPOrdEqual16, U1, F16, F16, ) | ||
| 244 | OPCODE(FPOrdEqual32, U1, F32, F32, ) | ||
| 245 | OPCODE(FPOrdEqual64, U1, F64, F64, ) | ||
| 246 | OPCODE(FPUnordEqual16, U1, F16, F16, ) | ||
| 247 | OPCODE(FPUnordEqual32, U1, F32, F32, ) | ||
| 248 | OPCODE(FPUnordEqual64, U1, F64, F64, ) | ||
| 249 | OPCODE(FPOrdNotEqual16, U1, F16, F16, ) | ||
| 250 | OPCODE(FPOrdNotEqual32, U1, F32, F32, ) | ||
| 251 | OPCODE(FPOrdNotEqual64, U1, F64, F64, ) | ||
| 252 | OPCODE(FPUnordNotEqual16, U1, F16, F16, ) | ||
| 253 | OPCODE(FPUnordNotEqual32, U1, F32, F32, ) | ||
| 254 | OPCODE(FPUnordNotEqual64, U1, F64, F64, ) | ||
| 255 | OPCODE(FPOrdLessThan16, U1, F16, F16, ) | ||
| 256 | OPCODE(FPOrdLessThan32, U1, F32, F32, ) | ||
| 257 | OPCODE(FPOrdLessThan64, U1, F64, F64, ) | ||
| 258 | OPCODE(FPUnordLessThan16, U1, F16, F16, ) | ||
| 259 | OPCODE(FPUnordLessThan32, U1, F32, F32, ) | ||
| 260 | OPCODE(FPUnordLessThan64, U1, F64, F64, ) | ||
| 261 | OPCODE(FPOrdGreaterThan16, U1, F16, F16, ) | ||
| 262 | OPCODE(FPOrdGreaterThan32, U1, F32, F32, ) | ||
| 263 | OPCODE(FPOrdGreaterThan64, U1, F64, F64, ) | ||
| 264 | OPCODE(FPUnordGreaterThan16, U1, F16, F16, ) | ||
| 265 | OPCODE(FPUnordGreaterThan32, U1, F32, F32, ) | ||
| 266 | OPCODE(FPUnordGreaterThan64, U1, F64, F64, ) | ||
| 267 | OPCODE(FPOrdLessThanEqual16, U1, F16, F16, ) | ||
| 268 | OPCODE(FPOrdLessThanEqual32, U1, F32, F32, ) | ||
| 269 | OPCODE(FPOrdLessThanEqual64, U1, F64, F64, ) | ||
| 270 | OPCODE(FPUnordLessThanEqual16, U1, F16, F16, ) | ||
| 271 | OPCODE(FPUnordLessThanEqual32, U1, F32, F32, ) | ||
| 272 | OPCODE(FPUnordLessThanEqual64, U1, F64, F64, ) | ||
| 273 | OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, ) | ||
| 274 | OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, ) | ||
| 275 | OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, ) | ||
| 276 | OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, ) | ||
| 277 | OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, ) | ||
| 278 | OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, ) | ||
| 279 | OPCODE(FPIsNan16, U1, F16, ) | ||
| 280 | OPCODE(FPIsNan32, U1, F32, ) | ||
| 281 | OPCODE(FPIsNan64, U1, F64, ) | ||
| 282 | |||
| 283 | // Integer operations | ||
| 284 | OPCODE(IAdd32, U32, U32, U32, ) | ||
| 285 | OPCODE(IAdd64, U64, U64, U64, ) | ||
| 286 | OPCODE(ISub32, U32, U32, U32, ) | ||
| 287 | OPCODE(ISub64, U64, U64, U64, ) | ||
| 288 | OPCODE(IMul32, U32, U32, U32, ) | ||
| 289 | OPCODE(INeg32, U32, U32, ) | ||
| 290 | OPCODE(INeg64, U64, U64, ) | ||
| 291 | OPCODE(IAbs32, U32, U32, ) | ||
| 292 | OPCODE(ShiftLeftLogical32, U32, U32, U32, ) | ||
| 293 | OPCODE(ShiftLeftLogical64, U64, U64, U32, ) | ||
| 294 | OPCODE(ShiftRightLogical32, U32, U32, U32, ) | ||
| 295 | OPCODE(ShiftRightLogical64, U64, U64, U32, ) | ||
| 296 | OPCODE(ShiftRightArithmetic32, U32, U32, U32, ) | ||
| 297 | OPCODE(ShiftRightArithmetic64, U64, U64, U32, ) | ||
| 298 | OPCODE(BitwiseAnd32, U32, U32, U32, ) | ||
| 299 | OPCODE(BitwiseOr32, U32, U32, U32, ) | ||
| 300 | OPCODE(BitwiseXor32, U32, U32, U32, ) | ||
| 301 | OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, ) | ||
| 302 | OPCODE(BitFieldSExtract, U32, U32, U32, U32, ) | ||
| 303 | OPCODE(BitFieldUExtract, U32, U32, U32, U32, ) | ||
| 304 | OPCODE(BitReverse32, U32, U32, ) | ||
| 305 | OPCODE(BitCount32, U32, U32, ) | ||
| 306 | OPCODE(BitwiseNot32, U32, U32, ) | ||
| 307 | |||
| 308 | OPCODE(FindSMsb32, U32, U32, ) | ||
| 309 | OPCODE(FindUMsb32, U32, U32, ) | ||
| 310 | OPCODE(SMin32, U32, U32, U32, ) | ||
| 311 | OPCODE(UMin32, U32, U32, U32, ) | ||
| 312 | OPCODE(SMax32, U32, U32, U32, ) | ||
| 313 | OPCODE(UMax32, U32, U32, U32, ) | ||
| 314 | OPCODE(SClamp32, U32, U32, U32, U32, ) | ||
| 315 | OPCODE(UClamp32, U32, U32, U32, U32, ) | ||
| 316 | OPCODE(SLessThan, U1, U32, U32, ) | ||
| 317 | OPCODE(ULessThan, U1, U32, U32, ) | ||
| 318 | OPCODE(IEqual, U1, U32, U32, ) | ||
| 319 | OPCODE(SLessThanEqual, U1, U32, U32, ) | ||
| 320 | OPCODE(ULessThanEqual, U1, U32, U32, ) | ||
| 321 | OPCODE(SGreaterThan, U1, U32, U32, ) | ||
| 322 | OPCODE(UGreaterThan, U1, U32, U32, ) | ||
| 323 | OPCODE(INotEqual, U1, U32, U32, ) | ||
| 324 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) | ||
| 325 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) | ||
| 326 | |||
| 327 | // Atomic operations | ||
| 328 | OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) | ||
| 329 | OPCODE(SharedAtomicSMin32, U32, U32, U32, ) | ||
| 330 | OPCODE(SharedAtomicUMin32, U32, U32, U32, ) | ||
| 331 | OPCODE(SharedAtomicSMax32, U32, U32, U32, ) | ||
| 332 | OPCODE(SharedAtomicUMax32, U32, U32, U32, ) | ||
| 333 | OPCODE(SharedAtomicInc32, U32, U32, U32, ) | ||
| 334 | OPCODE(SharedAtomicDec32, U32, U32, U32, ) | ||
| 335 | OPCODE(SharedAtomicAnd32, U32, U32, U32, ) | ||
| 336 | OPCODE(SharedAtomicOr32, U32, U32, U32, ) | ||
| 337 | OPCODE(SharedAtomicXor32, U32, U32, U32, ) | ||
| 338 | OPCODE(SharedAtomicExchange32, U32, U32, U32, ) | ||
| 339 | OPCODE(SharedAtomicExchange64, U64, U32, U64, ) | ||
| 340 | |||
| 341 | OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) | ||
| 342 | OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) | ||
| 343 | OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) | ||
| 344 | OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) | ||
| 345 | OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) | ||
| 346 | OPCODE(GlobalAtomicInc32, U32, U64, U32, ) | ||
| 347 | OPCODE(GlobalAtomicDec32, U32, U64, U32, ) | ||
| 348 | OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) | ||
| 349 | OPCODE(GlobalAtomicOr32, U32, U64, U32, ) | ||
| 350 | OPCODE(GlobalAtomicXor32, U32, U64, U32, ) | ||
| 351 | OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) | ||
| 352 | OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) | ||
| 353 | OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) | ||
| 354 | OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) | ||
| 355 | OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) | ||
| 356 | OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) | ||
| 357 | OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) | ||
| 358 | OPCODE(GlobalAtomicOr64, U64, U64, U64, ) | ||
| 359 | OPCODE(GlobalAtomicXor64, U64, U64, U64, ) | ||
| 360 | OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) | ||
| 361 | OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) | ||
| 362 | OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) | ||
| 363 | OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) | ||
| 364 | OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) | ||
| 365 | OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) | ||
| 366 | OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) | ||
| 367 | OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) | ||
| 368 | |||
| 369 | OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) | ||
| 370 | OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) | ||
| 371 | OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) | ||
| 372 | OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) | ||
| 373 | OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) | ||
| 374 | OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) | ||
| 375 | OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) | ||
| 376 | OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) | ||
| 377 | OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) | ||
| 378 | OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) | ||
| 379 | OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) | ||
| 380 | OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) | ||
| 381 | OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) | ||
| 382 | OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) | ||
| 383 | OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) | ||
| 384 | OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) | ||
| 385 | OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) | ||
| 386 | OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) | ||
| 387 | OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) | ||
| 388 | OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) | ||
| 389 | OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) | ||
| 390 | OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) | ||
| 391 | OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) | ||
| 392 | OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) | ||
| 393 | OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) | ||
| 394 | OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) | ||
| 395 | OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) | ||
| 396 | |||
| 397 | // Logical operations | ||
| 398 | OPCODE(LogicalOr, U1, U1, U1, ) | ||
| 399 | OPCODE(LogicalAnd, U1, U1, U1, ) | ||
| 400 | OPCODE(LogicalXor, U1, U1, U1, ) | ||
| 401 | OPCODE(LogicalNot, U1, U1, ) | ||
| 402 | |||
| 403 | // Conversion operations | ||
| 404 | OPCODE(ConvertS16F16, U32, F16, ) | ||
| 405 | OPCODE(ConvertS16F32, U32, F32, ) | ||
| 406 | OPCODE(ConvertS16F64, U32, F64, ) | ||
| 407 | OPCODE(ConvertS32F16, U32, F16, ) | ||
| 408 | OPCODE(ConvertS32F32, U32, F32, ) | ||
| 409 | OPCODE(ConvertS32F64, U32, F64, ) | ||
| 410 | OPCODE(ConvertS64F16, U64, F16, ) | ||
| 411 | OPCODE(ConvertS64F32, U64, F32, ) | ||
| 412 | OPCODE(ConvertS64F64, U64, F64, ) | ||
| 413 | OPCODE(ConvertU16F16, U32, F16, ) | ||
| 414 | OPCODE(ConvertU16F32, U32, F32, ) | ||
| 415 | OPCODE(ConvertU16F64, U32, F64, ) | ||
| 416 | OPCODE(ConvertU32F16, U32, F16, ) | ||
| 417 | OPCODE(ConvertU32F32, U32, F32, ) | ||
| 418 | OPCODE(ConvertU32F64, U32, F64, ) | ||
| 419 | OPCODE(ConvertU64F16, U64, F16, ) | ||
| 420 | OPCODE(ConvertU64F32, U64, F32, ) | ||
| 421 | OPCODE(ConvertU64F64, U64, F64, ) | ||
| 422 | OPCODE(ConvertU64U32, U64, U32, ) | ||
| 423 | OPCODE(ConvertU32U64, U32, U64, ) | ||
| 424 | OPCODE(ConvertF16F32, F16, F32, ) | ||
| 425 | OPCODE(ConvertF32F16, F32, F16, ) | ||
| 426 | OPCODE(ConvertF32F64, F32, F64, ) | ||
| 427 | OPCODE(ConvertF64F32, F64, F32, ) | ||
| 428 | OPCODE(ConvertF16S8, F16, U32, ) | ||
| 429 | OPCODE(ConvertF16S16, F16, U32, ) | ||
| 430 | OPCODE(ConvertF16S32, F16, U32, ) | ||
| 431 | OPCODE(ConvertF16S64, F16, U64, ) | ||
| 432 | OPCODE(ConvertF16U8, F16, U32, ) | ||
| 433 | OPCODE(ConvertF16U16, F16, U32, ) | ||
| 434 | OPCODE(ConvertF16U32, F16, U32, ) | ||
| 435 | OPCODE(ConvertF16U64, F16, U64, ) | ||
| 436 | OPCODE(ConvertF32S8, F32, U32, ) | ||
| 437 | OPCODE(ConvertF32S16, F32, U32, ) | ||
| 438 | OPCODE(ConvertF32S32, F32, U32, ) | ||
| 439 | OPCODE(ConvertF32S64, F32, U64, ) | ||
| 440 | OPCODE(ConvertF32U8, F32, U32, ) | ||
| 441 | OPCODE(ConvertF32U16, F32, U32, ) | ||
| 442 | OPCODE(ConvertF32U32, F32, U32, ) | ||
| 443 | OPCODE(ConvertF32U64, F32, U64, ) | ||
| 444 | OPCODE(ConvertF64S8, F64, U32, ) | ||
| 445 | OPCODE(ConvertF64S16, F64, U32, ) | ||
| 446 | OPCODE(ConvertF64S32, F64, U32, ) | ||
| 447 | OPCODE(ConvertF64S64, F64, U64, ) | ||
| 448 | OPCODE(ConvertF64U8, F64, U32, ) | ||
| 449 | OPCODE(ConvertF64U16, F64, U32, ) | ||
| 450 | OPCODE(ConvertF64U32, F64, U32, ) | ||
| 451 | OPCODE(ConvertF64U64, F64, U64, ) | ||
| 452 | |||
| 453 | // Image operations | ||
| 454 | OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 455 | OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 456 | OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 457 | OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 458 | OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 459 | OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 460 | OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) | ||
| 461 | OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, ) | ||
| 462 | OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, ) | ||
| 463 | OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) | ||
| 464 | OPCODE(BindlessImageRead, U32x4, U32, Opaque, ) | ||
| 465 | OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, ) | ||
| 466 | |||
| 467 | OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 468 | OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 469 | OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 470 | OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | ||
| 471 | OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, ) | ||
| 472 | OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, ) | ||
| 473 | OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, ) | ||
| 474 | OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, ) | ||
| 475 | OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, ) | ||
| 476 | OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, ) | ||
| 477 | OPCODE(BoundImageRead, U32x4, U32, Opaque, ) | ||
| 478 | OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, ) | ||
| 479 | |||
| 480 | OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) | ||
| 481 | OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, ) | ||
| 482 | OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) | ||
| 483 | OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, ) | ||
| 484 | OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, ) | ||
| 485 | OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, ) | ||
| 486 | OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, ) | ||
| 487 | OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, ) | ||
| 488 | OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, ) | ||
| 489 | OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, ) | ||
| 490 | OPCODE(ImageRead, U32x4, Opaque, Opaque, ) | ||
| 491 | OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, ) | ||
| 492 | |||
| 493 | // Atomic Image operations | ||
| 494 | |||
| 495 | OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, ) | ||
| 496 | OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, ) | ||
| 497 | OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, ) | ||
| 498 | OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, ) | ||
| 499 | OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, ) | ||
| 500 | OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, ) | ||
| 501 | OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, ) | ||
| 502 | OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, ) | ||
| 503 | OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, ) | ||
| 504 | OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, ) | ||
| 505 | OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, ) | ||
| 506 | |||
| 507 | OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, ) | ||
| 508 | OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, ) | ||
| 509 | OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, ) | ||
| 510 | OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, ) | ||
| 511 | OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, ) | ||
| 512 | OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, ) | ||
| 513 | OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, ) | ||
| 514 | OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, ) | ||
| 515 | OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, ) | ||
| 516 | OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, ) | ||
| 517 | OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, ) | ||
| 518 | |||
| 519 | OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, ) | ||
| 520 | OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, ) | ||
| 521 | OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, ) | ||
| 522 | OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, ) | ||
| 523 | OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, ) | ||
| 524 | OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, ) | ||
| 525 | OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, ) | ||
| 526 | OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, ) | ||
| 527 | OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, ) | ||
| 528 | OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, ) | ||
| 529 | OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, ) | ||
| 530 | |||
| 531 | // Warp operations | ||
| 532 | OPCODE(LaneId, U32, ) | ||
| 533 | OPCODE(VoteAll, U1, U1, ) | ||
| 534 | OPCODE(VoteAny, U1, U1, ) | ||
| 535 | OPCODE(VoteEqual, U1, U1, ) | ||
| 536 | OPCODE(SubgroupBallot, U32, U1, ) | ||
| 537 | OPCODE(SubgroupEqMask, U32, ) | ||
| 538 | OPCODE(SubgroupLtMask, U32, ) | ||
| 539 | OPCODE(SubgroupLeMask, U32, ) | ||
| 540 | OPCODE(SubgroupGtMask, U32, ) | ||
| 541 | OPCODE(SubgroupGeMask, U32, ) | ||
| 542 | OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) | ||
| 543 | OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) | ||
| 544 | OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) | ||
| 545 | OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) | ||
| 546 | OPCODE(FSwizzleAdd, F32, F32, F32, U32, ) | ||
| 547 | OPCODE(DPdxFine, F32, F32, ) | ||
| 548 | OPCODE(DPdyFine, F32, F32, ) | ||
| 549 | OPCODE(DPdxCoarse, F32, F32, ) | ||
| 550 | OPCODE(DPdyCoarse, F32, F32, ) | ||
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp new file mode 100644 index 000000000..4c956a970 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.cpp | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/exception.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/patch.h" | ||
| 7 | |||
| 8 | namespace Shader::IR { | ||
| 9 | |||
| 10 | bool IsGeneric(Patch patch) noexcept { | ||
| 11 | return patch >= Patch::Component0 && patch <= Patch::Component119; | ||
| 12 | } | ||
| 13 | |||
| 14 | u32 GenericPatchIndex(Patch patch) { | ||
| 15 | if (!IsGeneric(patch)) { | ||
| 16 | throw InvalidArgument("Patch {} is not generic", patch); | ||
| 17 | } | ||
| 18 | return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4; | ||
| 19 | } | ||
| 20 | |||
| 21 | u32 GenericPatchElement(Patch patch) { | ||
| 22 | if (!IsGeneric(patch)) { | ||
| 23 | throw InvalidArgument("Patch {} is not generic", patch); | ||
| 24 | } | ||
| 25 | return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4; | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h new file mode 100644 index 000000000..6d66ff0d6 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/patch.h | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | |||
| 11 | enum class Patch : u64 { | ||
| 12 | TessellationLodLeft, | ||
| 13 | TessellationLodTop, | ||
| 14 | TessellationLodRight, | ||
| 15 | TessellationLodBottom, | ||
| 16 | TessellationLodInteriorU, | ||
| 17 | TessellationLodInteriorV, | ||
| 18 | ComponentPadding0, | ||
| 19 | ComponentPadding1, | ||
| 20 | Component0, | ||
| 21 | Component1, | ||
| 22 | Component2, | ||
| 23 | Component3, | ||
| 24 | Component4, | ||
| 25 | Component5, | ||
| 26 | Component6, | ||
| 27 | Component7, | ||
| 28 | Component8, | ||
| 29 | Component9, | ||
| 30 | Component10, | ||
| 31 | Component11, | ||
| 32 | Component12, | ||
| 33 | Component13, | ||
| 34 | Component14, | ||
| 35 | Component15, | ||
| 36 | Component16, | ||
| 37 | Component17, | ||
| 38 | Component18, | ||
| 39 | Component19, | ||
| 40 | Component20, | ||
| 41 | Component21, | ||
| 42 | Component22, | ||
| 43 | Component23, | ||
| 44 | Component24, | ||
| 45 | Component25, | ||
| 46 | Component26, | ||
| 47 | Component27, | ||
| 48 | Component28, | ||
| 49 | Component29, | ||
| 50 | Component30, | ||
| 51 | Component31, | ||
| 52 | Component32, | ||
| 53 | Component33, | ||
| 54 | Component34, | ||
| 55 | Component35, | ||
| 56 | Component36, | ||
| 57 | Component37, | ||
| 58 | Component38, | ||
| 59 | Component39, | ||
| 60 | Component40, | ||
| 61 | Component41, | ||
| 62 | Component42, | ||
| 63 | Component43, | ||
| 64 | Component44, | ||
| 65 | Component45, | ||
| 66 | Component46, | ||
| 67 | Component47, | ||
| 68 | Component48, | ||
| 69 | Component49, | ||
| 70 | Component50, | ||
| 71 | Component51, | ||
| 72 | Component52, | ||
| 73 | Component53, | ||
| 74 | Component54, | ||
| 75 | Component55, | ||
| 76 | Component56, | ||
| 77 | Component57, | ||
| 78 | Component58, | ||
| 79 | Component59, | ||
| 80 | Component60, | ||
| 81 | Component61, | ||
| 82 | Component62, | ||
| 83 | Component63, | ||
| 84 | Component64, | ||
| 85 | Component65, | ||
| 86 | Component66, | ||
| 87 | Component67, | ||
| 88 | Component68, | ||
| 89 | Component69, | ||
| 90 | Component70, | ||
| 91 | Component71, | ||
| 92 | Component72, | ||
| 93 | Component73, | ||
| 94 | Component74, | ||
| 95 | Component75, | ||
| 96 | Component76, | ||
| 97 | Component77, | ||
| 98 | Component78, | ||
| 99 | Component79, | ||
| 100 | Component80, | ||
| 101 | Component81, | ||
| 102 | Component82, | ||
| 103 | Component83, | ||
| 104 | Component84, | ||
| 105 | Component85, | ||
| 106 | Component86, | ||
| 107 | Component87, | ||
| 108 | Component88, | ||
| 109 | Component89, | ||
| 110 | Component90, | ||
| 111 | Component91, | ||
| 112 | Component92, | ||
| 113 | Component93, | ||
| 114 | Component94, | ||
| 115 | Component95, | ||
| 116 | Component96, | ||
| 117 | Component97, | ||
| 118 | Component98, | ||
| 119 | Component99, | ||
| 120 | Component100, | ||
| 121 | Component101, | ||
| 122 | Component102, | ||
| 123 | Component103, | ||
| 124 | Component104, | ||
| 125 | Component105, | ||
| 126 | Component106, | ||
| 127 | Component107, | ||
| 128 | Component108, | ||
| 129 | Component109, | ||
| 130 | Component110, | ||
| 131 | Component111, | ||
| 132 | Component112, | ||
| 133 | Component113, | ||
| 134 | Component114, | ||
| 135 | Component115, | ||
| 136 | Component116, | ||
| 137 | Component117, | ||
| 138 | Component118, | ||
| 139 | Component119, | ||
| 140 | }; | ||
| 141 | static_assert(static_cast<u64>(Patch::Component119) == 127); | ||
| 142 | |||
| 143 | [[nodiscard]] bool IsGeneric(Patch patch) noexcept; | ||
| 144 | |||
| 145 | [[nodiscard]] u32 GenericPatchIndex(Patch patch); | ||
| 146 | |||
| 147 | [[nodiscard]] u32 GenericPatchElement(Patch patch); | ||
| 148 | |||
| 149 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp new file mode 100644 index 000000000..16bc44101 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include <boost/container/flat_set.hpp> | ||
| 8 | #include <boost/container/small_vector.hpp> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/post_order.h" | ||
| 12 | |||
| 13 | namespace Shader::IR { | ||
| 14 | |||
| 15 | BlockList PostOrder(const AbstractSyntaxNode& root) { | ||
| 16 | boost::container::small_vector<Block*, 16> block_stack; | ||
| 17 | boost::container::flat_set<Block*> visited; | ||
| 18 | BlockList post_order_blocks; | ||
| 19 | |||
| 20 | if (root.type != AbstractSyntaxNode::Type::Block) { | ||
| 21 | throw LogicError("First node in abstract syntax list root is not a block"); | ||
| 22 | } | ||
| 23 | Block* const first_block{root.data.block}; | ||
| 24 | visited.insert(first_block); | ||
| 25 | block_stack.push_back(first_block); | ||
| 26 | |||
| 27 | while (!block_stack.empty()) { | ||
| 28 | Block* const block{block_stack.back()}; | ||
| 29 | const auto visit{[&](Block* branch) { | ||
| 30 | if (!visited.insert(branch).second) { | ||
| 31 | return false; | ||
| 32 | } | ||
| 33 | // Calling push_back twice is faster than insert on MSVC | ||
| 34 | block_stack.push_back(block); | ||
| 35 | block_stack.push_back(branch); | ||
| 36 | return true; | ||
| 37 | }}; | ||
| 38 | block_stack.pop_back(); | ||
| 39 | if (std::ranges::none_of(block->ImmSuccessors(), visit)) { | ||
| 40 | post_order_blocks.push_back(block); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | return post_order_blocks; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h new file mode 100644 index 000000000..07bfbadc3 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/post_order.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | |||
| 12 | BlockList PostOrder(const AbstractSyntaxNode& root); | ||
| 13 | |||
| 14 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h new file mode 100644 index 000000000..4e7f32423 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/pred.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | namespace Shader::IR { | ||
| 10 | |||
| 11 | enum class Pred : u64 { | ||
| 12 | P0, | ||
| 13 | P1, | ||
| 14 | P2, | ||
| 15 | P3, | ||
| 16 | P4, | ||
| 17 | P5, | ||
| 18 | P6, | ||
| 19 | PT, | ||
| 20 | }; | ||
| 21 | |||
| 22 | constexpr size_t NUM_USER_PREDS = 7; | ||
| 23 | constexpr size_t NUM_PREDS = 8; | ||
| 24 | |||
| 25 | [[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept { | ||
| 26 | return static_cast<size_t>(pred); | ||
| 27 | } | ||
| 28 | |||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct fmt::formatter<Shader::IR::Pred> { | ||
| 33 | constexpr auto parse(format_parse_context& ctx) { | ||
| 34 | return ctx.begin(); | ||
| 35 | } | ||
| 36 | template <typename FormatContext> | ||
| 37 | auto format(const Shader::IR::Pred& pred, FormatContext& ctx) { | ||
| 38 | if (pred == Shader::IR::Pred::PT) { | ||
| 39 | return fmt::format_to(ctx.out(), "PT"); | ||
| 40 | } else { | ||
| 41 | return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred)); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp new file mode 100644 index 000000000..3fc06f855 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.cpp | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::IR { | ||
| 15 | |||
| 16 | std::string DumpProgram(const Program& program) { | ||
| 17 | size_t index{0}; | ||
| 18 | std::map<const IR::Inst*, size_t> inst_to_index; | ||
| 19 | std::map<const IR::Block*, size_t> block_to_index; | ||
| 20 | |||
| 21 | for (const IR::Block* const block : program.blocks) { | ||
| 22 | block_to_index.emplace(block, index); | ||
| 23 | ++index; | ||
| 24 | } | ||
| 25 | std::string ret; | ||
| 26 | for (const auto& block : program.blocks) { | ||
| 27 | ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n'; | ||
| 28 | } | ||
| 29 | return ret; | ||
| 30 | } | ||
| 31 | |||
| 32 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h new file mode 100644 index 000000000..ebcaa8bc2 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/program.h | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <string> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/program_header.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | |||
| 18 | struct Program { | ||
| 19 | AbstractSyntaxList syntax_list; | ||
| 20 | BlockList blocks; | ||
| 21 | BlockList post_order_blocks; | ||
| 22 | Info info; | ||
| 23 | Stage stage{}; | ||
| 24 | std::array<u32, 3> workgroup_size{}; | ||
| 25 | OutputTopology output_topology{}; | ||
| 26 | u32 output_vertices{}; | ||
| 27 | u32 invocations{}; | ||
| 28 | u32 local_memory_size{}; | ||
| 29 | u32 shared_memory_size{}; | ||
| 30 | bool is_geometry_passthrough{}; | ||
| 31 | }; | ||
| 32 | |||
| 33 | [[nodiscard]] std::string DumpProgram(const Program& program); | ||
| 34 | |||
| 35 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h new file mode 100644 index 000000000..a4b635792 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/reg.h | |||
| @@ -0,0 +1,332 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/exception.h" | ||
| 11 | |||
| 12 | namespace Shader::IR { | ||
| 13 | |||
| 14 | enum class Reg : u64 { | ||
| 15 | R0, | ||
| 16 | R1, | ||
| 17 | R2, | ||
| 18 | R3, | ||
| 19 | R4, | ||
| 20 | R5, | ||
| 21 | R6, | ||
| 22 | R7, | ||
| 23 | R8, | ||
| 24 | R9, | ||
| 25 | R10, | ||
| 26 | R11, | ||
| 27 | R12, | ||
| 28 | R13, | ||
| 29 | R14, | ||
| 30 | R15, | ||
| 31 | R16, | ||
| 32 | R17, | ||
| 33 | R18, | ||
| 34 | R19, | ||
| 35 | R20, | ||
| 36 | R21, | ||
| 37 | R22, | ||
| 38 | R23, | ||
| 39 | R24, | ||
| 40 | R25, | ||
| 41 | R26, | ||
| 42 | R27, | ||
| 43 | R28, | ||
| 44 | R29, | ||
| 45 | R30, | ||
| 46 | R31, | ||
| 47 | R32, | ||
| 48 | R33, | ||
| 49 | R34, | ||
| 50 | R35, | ||
| 51 | R36, | ||
| 52 | R37, | ||
| 53 | R38, | ||
| 54 | R39, | ||
| 55 | R40, | ||
| 56 | R41, | ||
| 57 | R42, | ||
| 58 | R43, | ||
| 59 | R44, | ||
| 60 | R45, | ||
| 61 | R46, | ||
| 62 | R47, | ||
| 63 | R48, | ||
| 64 | R49, | ||
| 65 | R50, | ||
| 66 | R51, | ||
| 67 | R52, | ||
| 68 | R53, | ||
| 69 | R54, | ||
| 70 | R55, | ||
| 71 | R56, | ||
| 72 | R57, | ||
| 73 | R58, | ||
| 74 | R59, | ||
| 75 | R60, | ||
| 76 | R61, | ||
| 77 | R62, | ||
| 78 | R63, | ||
| 79 | R64, | ||
| 80 | R65, | ||
| 81 | R66, | ||
| 82 | R67, | ||
| 83 | R68, | ||
| 84 | R69, | ||
| 85 | R70, | ||
| 86 | R71, | ||
| 87 | R72, | ||
| 88 | R73, | ||
| 89 | R74, | ||
| 90 | R75, | ||
| 91 | R76, | ||
| 92 | R77, | ||
| 93 | R78, | ||
| 94 | R79, | ||
| 95 | R80, | ||
| 96 | R81, | ||
| 97 | R82, | ||
| 98 | R83, | ||
| 99 | R84, | ||
| 100 | R85, | ||
| 101 | R86, | ||
| 102 | R87, | ||
| 103 | R88, | ||
| 104 | R89, | ||
| 105 | R90, | ||
| 106 | R91, | ||
| 107 | R92, | ||
| 108 | R93, | ||
| 109 | R94, | ||
| 110 | R95, | ||
| 111 | R96, | ||
| 112 | R97, | ||
| 113 | R98, | ||
| 114 | R99, | ||
| 115 | R100, | ||
| 116 | R101, | ||
| 117 | R102, | ||
| 118 | R103, | ||
| 119 | R104, | ||
| 120 | R105, | ||
| 121 | R106, | ||
| 122 | R107, | ||
| 123 | R108, | ||
| 124 | R109, | ||
| 125 | R110, | ||
| 126 | R111, | ||
| 127 | R112, | ||
| 128 | R113, | ||
| 129 | R114, | ||
| 130 | R115, | ||
| 131 | R116, | ||
| 132 | R117, | ||
| 133 | R118, | ||
| 134 | R119, | ||
| 135 | R120, | ||
| 136 | R121, | ||
| 137 | R122, | ||
| 138 | R123, | ||
| 139 | R124, | ||
| 140 | R125, | ||
| 141 | R126, | ||
| 142 | R127, | ||
| 143 | R128, | ||
| 144 | R129, | ||
| 145 | R130, | ||
| 146 | R131, | ||
| 147 | R132, | ||
| 148 | R133, | ||
| 149 | R134, | ||
| 150 | R135, | ||
| 151 | R136, | ||
| 152 | R137, | ||
| 153 | R138, | ||
| 154 | R139, | ||
| 155 | R140, | ||
| 156 | R141, | ||
| 157 | R142, | ||
| 158 | R143, | ||
| 159 | R144, | ||
| 160 | R145, | ||
| 161 | R146, | ||
| 162 | R147, | ||
| 163 | R148, | ||
| 164 | R149, | ||
| 165 | R150, | ||
| 166 | R151, | ||
| 167 | R152, | ||
| 168 | R153, | ||
| 169 | R154, | ||
| 170 | R155, | ||
| 171 | R156, | ||
| 172 | R157, | ||
| 173 | R158, | ||
| 174 | R159, | ||
| 175 | R160, | ||
| 176 | R161, | ||
| 177 | R162, | ||
| 178 | R163, | ||
| 179 | R164, | ||
| 180 | R165, | ||
| 181 | R166, | ||
| 182 | R167, | ||
| 183 | R168, | ||
| 184 | R169, | ||
| 185 | R170, | ||
| 186 | R171, | ||
| 187 | R172, | ||
| 188 | R173, | ||
| 189 | R174, | ||
| 190 | R175, | ||
| 191 | R176, | ||
| 192 | R177, | ||
| 193 | R178, | ||
| 194 | R179, | ||
| 195 | R180, | ||
| 196 | R181, | ||
| 197 | R182, | ||
| 198 | R183, | ||
| 199 | R184, | ||
| 200 | R185, | ||
| 201 | R186, | ||
| 202 | R187, | ||
| 203 | R188, | ||
| 204 | R189, | ||
| 205 | R190, | ||
| 206 | R191, | ||
| 207 | R192, | ||
| 208 | R193, | ||
| 209 | R194, | ||
| 210 | R195, | ||
| 211 | R196, | ||
| 212 | R197, | ||
| 213 | R198, | ||
| 214 | R199, | ||
| 215 | R200, | ||
| 216 | R201, | ||
| 217 | R202, | ||
| 218 | R203, | ||
| 219 | R204, | ||
| 220 | R205, | ||
| 221 | R206, | ||
| 222 | R207, | ||
| 223 | R208, | ||
| 224 | R209, | ||
| 225 | R210, | ||
| 226 | R211, | ||
| 227 | R212, | ||
| 228 | R213, | ||
| 229 | R214, | ||
| 230 | R215, | ||
| 231 | R216, | ||
| 232 | R217, | ||
| 233 | R218, | ||
| 234 | R219, | ||
| 235 | R220, | ||
| 236 | R221, | ||
| 237 | R222, | ||
| 238 | R223, | ||
| 239 | R224, | ||
| 240 | R225, | ||
| 241 | R226, | ||
| 242 | R227, | ||
| 243 | R228, | ||
| 244 | R229, | ||
| 245 | R230, | ||
| 246 | R231, | ||
| 247 | R232, | ||
| 248 | R233, | ||
| 249 | R234, | ||
| 250 | R235, | ||
| 251 | R236, | ||
| 252 | R237, | ||
| 253 | R238, | ||
| 254 | R239, | ||
| 255 | R240, | ||
| 256 | R241, | ||
| 257 | R242, | ||
| 258 | R243, | ||
| 259 | R244, | ||
| 260 | R245, | ||
| 261 | R246, | ||
| 262 | R247, | ||
| 263 | R248, | ||
| 264 | R249, | ||
| 265 | R250, | ||
| 266 | R251, | ||
| 267 | R252, | ||
| 268 | R253, | ||
| 269 | R254, | ||
| 270 | RZ, | ||
| 271 | }; | ||
| 272 | static_assert(static_cast<int>(Reg::RZ) == 255); | ||
| 273 | |||
| 274 | constexpr size_t NUM_USER_REGS = 255; | ||
| 275 | constexpr size_t NUM_REGS = 256; | ||
| 276 | |||
| 277 | [[nodiscard]] constexpr Reg operator+(Reg reg, int num) { | ||
| 278 | if (reg == Reg::RZ) { | ||
| 279 | // Adding or subtracting registers from RZ yields RZ | ||
| 280 | return Reg::RZ; | ||
| 281 | } | ||
| 282 | const int result{static_cast<int>(reg) + num}; | ||
| 283 | if (result >= static_cast<int>(Reg::RZ)) { | ||
| 284 | throw LogicError("Overflow on register arithmetic"); | ||
| 285 | } | ||
| 286 | if (result < 0) { | ||
| 287 | throw LogicError("Underflow on register arithmetic"); | ||
| 288 | } | ||
| 289 | return static_cast<Reg>(result); | ||
| 290 | } | ||
| 291 | |||
| 292 | [[nodiscard]] constexpr Reg operator-(Reg reg, int num) { | ||
| 293 | return reg + (-num); | ||
| 294 | } | ||
| 295 | |||
| 296 | constexpr Reg operator++(Reg& reg) { | ||
| 297 | reg = reg + 1; | ||
| 298 | return reg; | ||
| 299 | } | ||
| 300 | |||
| 301 | constexpr Reg operator++(Reg& reg, int) { | ||
| 302 | const Reg copy{reg}; | ||
| 303 | reg = reg + 1; | ||
| 304 | return copy; | ||
| 305 | } | ||
| 306 | |||
| 307 | [[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept { | ||
| 308 | return static_cast<size_t>(reg); | ||
| 309 | } | ||
| 310 | |||
| 311 | [[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) { | ||
| 312 | return RegIndex(reg) % align == 0 || reg == Reg::RZ; | ||
| 313 | } | ||
| 314 | |||
| 315 | } // namespace Shader::IR | ||
| 316 | |||
| 317 | template <> | ||
| 318 | struct fmt::formatter<Shader::IR::Reg> { | ||
| 319 | constexpr auto parse(format_parse_context& ctx) { | ||
| 320 | return ctx.begin(); | ||
| 321 | } | ||
| 322 | template <typename FormatContext> | ||
| 323 | auto format(const Shader::IR::Reg& reg, FormatContext& ctx) { | ||
| 324 | if (reg == Shader::IR::Reg::RZ) { | ||
| 325 | return fmt::format_to(ctx.out(), "RZ"); | ||
| 326 | } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) { | ||
| 327 | return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg)); | ||
| 328 | } else { | ||
| 329 | throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg)); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp new file mode 100644 index 000000000..f28341bfe --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <string> | ||
| 7 | |||
| 8 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | |||
| 12 | std::string NameOf(Type type) { | ||
| 13 | static constexpr std::array names{ | ||
| 14 | "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", | ||
| 15 | "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3", | ||
| 16 | "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4", | ||
| 17 | }; | ||
| 18 | const size_t bits{static_cast<size_t>(type)}; | ||
| 19 | if (bits == 0) { | ||
| 20 | return "Void"; | ||
| 21 | } | ||
| 22 | std::string result; | ||
| 23 | for (size_t i = 0; i < names.size(); i++) { | ||
| 24 | if ((bits & (size_t{1} << i)) != 0) { | ||
| 25 | if (!result.empty()) { | ||
| 26 | result += '|'; | ||
| 27 | } | ||
| 28 | result += names[i]; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | return result; | ||
| 32 | } | ||
| 33 | |||
| 34 | bool AreTypesCompatible(Type lhs, Type rhs) noexcept { | ||
| 35 | return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque; | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h new file mode 100644 index 000000000..294b230c4 --- /dev/null +++ b/src/shader_recompiler/frontend/ir/type.h | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "shader_recompiler/exception.h" | ||
| 13 | |||
| 14 | namespace Shader::IR { | ||
| 15 | |||
| 16 | enum class Type { | ||
| 17 | Void = 0, | ||
| 18 | Opaque = 1 << 0, | ||
| 19 | Reg = 1 << 1, | ||
| 20 | Pred = 1 << 2, | ||
| 21 | Attribute = 1 << 3, | ||
| 22 | Patch = 1 << 4, | ||
| 23 | U1 = 1 << 5, | ||
| 24 | U8 = 1 << 6, | ||
| 25 | U16 = 1 << 7, | ||
| 26 | U32 = 1 << 8, | ||
| 27 | U64 = 1 << 9, | ||
| 28 | F16 = 1 << 10, | ||
| 29 | F32 = 1 << 11, | ||
| 30 | F64 = 1 << 12, | ||
| 31 | U32x2 = 1 << 13, | ||
| 32 | U32x3 = 1 << 14, | ||
| 33 | U32x4 = 1 << 15, | ||
| 34 | F16x2 = 1 << 16, | ||
| 35 | F16x3 = 1 << 17, | ||
| 36 | F16x4 = 1 << 18, | ||
| 37 | F32x2 = 1 << 19, | ||
| 38 | F32x3 = 1 << 20, | ||
| 39 | F32x4 = 1 << 21, | ||
| 40 | F64x2 = 1 << 22, | ||
| 41 | F64x3 = 1 << 23, | ||
| 42 | F64x4 = 1 << 24, | ||
| 43 | }; | ||
| 44 | DECLARE_ENUM_FLAG_OPERATORS(Type) | ||
| 45 | |||
| 46 | [[nodiscard]] std::string NameOf(Type type); | ||
| 47 | |||
| 48 | [[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept; | ||
| 49 | |||
| 50 | } // namespace Shader::IR | ||
| 51 | |||
| 52 | template <> | ||
| 53 | struct fmt::formatter<Shader::IR::Type> { | ||
| 54 | constexpr auto parse(format_parse_context& ctx) { | ||
| 55 | return ctx.begin(); | ||
| 56 | } | ||
| 57 | template <typename FormatContext> | ||
| 58 | auto format(const Shader::IR::Type& type, FormatContext& ctx) { | ||
| 59 | return fmt::format_to(ctx.out(), "{}", NameOf(type)); | ||
| 60 | } | ||
| 61 | }; | ||
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp new file mode 100644 index 000000000..d365ea1bc --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.cpp | |||
| @@ -0,0 +1,99 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | |||
| 8 | namespace Shader::IR { | ||
| 9 | |||
| 10 | Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {} | ||
| 11 | |||
| 12 | Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {} | ||
| 13 | |||
| 14 | Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {} | ||
| 15 | |||
| 16 | Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {} | ||
| 17 | |||
| 18 | Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {} | ||
| 19 | |||
| 20 | Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {} | ||
| 21 | |||
| 22 | Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {} | ||
| 23 | |||
| 24 | Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {} | ||
| 25 | |||
| 26 | Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} | ||
| 27 | |||
| 28 | Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {} | ||
| 29 | |||
| 30 | Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} | ||
| 31 | |||
| 32 | Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {} | ||
| 33 | |||
| 34 | IR::Type Value::Type() const noexcept { | ||
| 35 | if (IsPhi()) { | ||
| 36 | // The type of a phi node is stored in its flags | ||
| 37 | return inst->Flags<IR::Type>(); | ||
| 38 | } | ||
| 39 | if (IsIdentity()) { | ||
| 40 | return inst->Arg(0).Type(); | ||
| 41 | } | ||
| 42 | if (type == Type::Opaque) { | ||
| 43 | return inst->Type(); | ||
| 44 | } | ||
| 45 | return type; | ||
| 46 | } | ||
| 47 | |||
| 48 | bool Value::operator==(const Value& other) const { | ||
| 49 | if (type != other.type) { | ||
| 50 | return false; | ||
| 51 | } | ||
| 52 | switch (type) { | ||
| 53 | case Type::Void: | ||
| 54 | return true; | ||
| 55 | case Type::Opaque: | ||
| 56 | return inst == other.inst; | ||
| 57 | case Type::Reg: | ||
| 58 | return reg == other.reg; | ||
| 59 | case Type::Pred: | ||
| 60 | return pred == other.pred; | ||
| 61 | case Type::Attribute: | ||
| 62 | return attribute == other.attribute; | ||
| 63 | case Type::Patch: | ||
| 64 | return patch == other.patch; | ||
| 65 | case Type::U1: | ||
| 66 | return imm_u1 == other.imm_u1; | ||
| 67 | case Type::U8: | ||
| 68 | return imm_u8 == other.imm_u8; | ||
| 69 | case Type::U16: | ||
| 70 | case Type::F16: | ||
| 71 | return imm_u16 == other.imm_u16; | ||
| 72 | case Type::U32: | ||
| 73 | case Type::F32: | ||
| 74 | return imm_u32 == other.imm_u32; | ||
| 75 | case Type::U64: | ||
| 76 | case Type::F64: | ||
| 77 | return imm_u64 == other.imm_u64; | ||
| 78 | case Type::U32x2: | ||
| 79 | case Type::U32x3: | ||
| 80 | case Type::U32x4: | ||
| 81 | case Type::F16x2: | ||
| 82 | case Type::F16x3: | ||
| 83 | case Type::F16x4: | ||
| 84 | case Type::F32x2: | ||
| 85 | case Type::F32x3: | ||
| 86 | case Type::F32x4: | ||
| 87 | case Type::F64x2: | ||
| 88 | case Type::F64x3: | ||
| 89 | case Type::F64x4: | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | throw LogicError("Invalid type {}", type); | ||
| 93 | } | ||
| 94 | |||
| 95 | bool Value::operator!=(const Value& other) const { | ||
| 96 | return !operator==(other); | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h new file mode 100644 index 000000000..0c6bf684d --- /dev/null +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -0,0 +1,398 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstring> | ||
| 9 | #include <memory> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/intrusive/list.hpp> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/bit_cast.h" | ||
| 19 | #include "common/common_types.h" | ||
| 20 | #include "shader_recompiler/exception.h" | ||
| 21 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 22 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 23 | #include "shader_recompiler/frontend/ir/patch.h" | ||
| 24 | #include "shader_recompiler/frontend/ir/pred.h" | ||
| 25 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 26 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 27 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 28 | |||
| 29 | namespace Shader::IR { | ||
| 30 | |||
| 31 | class Block; | ||
| 32 | class Inst; | ||
| 33 | |||
| 34 | struct AssociatedInsts; | ||
| 35 | |||
| 36 | class Value { | ||
| 37 | public: | ||
| 38 | Value() noexcept = default; | ||
| 39 | explicit Value(IR::Inst* value) noexcept; | ||
| 40 | explicit Value(IR::Reg value) noexcept; | ||
| 41 | explicit Value(IR::Pred value) noexcept; | ||
| 42 | explicit Value(IR::Attribute value) noexcept; | ||
| 43 | explicit Value(IR::Patch value) noexcept; | ||
| 44 | explicit Value(bool value) noexcept; | ||
| 45 | explicit Value(u8 value) noexcept; | ||
| 46 | explicit Value(u16 value) noexcept; | ||
| 47 | explicit Value(u32 value) noexcept; | ||
| 48 | explicit Value(f32 value) noexcept; | ||
| 49 | explicit Value(u64 value) noexcept; | ||
| 50 | explicit Value(f64 value) noexcept; | ||
| 51 | |||
| 52 | [[nodiscard]] bool IsIdentity() const noexcept; | ||
| 53 | [[nodiscard]] bool IsPhi() const noexcept; | ||
| 54 | [[nodiscard]] bool IsEmpty() const noexcept; | ||
| 55 | [[nodiscard]] bool IsImmediate() const noexcept; | ||
| 56 | [[nodiscard]] IR::Type Type() const noexcept; | ||
| 57 | |||
| 58 | [[nodiscard]] IR::Inst* Inst() const; | ||
| 59 | [[nodiscard]] IR::Inst* InstRecursive() const; | ||
| 60 | [[nodiscard]] IR::Value Resolve() const; | ||
| 61 | [[nodiscard]] IR::Reg Reg() const; | ||
| 62 | [[nodiscard]] IR::Pred Pred() const; | ||
| 63 | [[nodiscard]] IR::Attribute Attribute() const; | ||
| 64 | [[nodiscard]] IR::Patch Patch() const; | ||
| 65 | [[nodiscard]] bool U1() const; | ||
| 66 | [[nodiscard]] u8 U8() const; | ||
| 67 | [[nodiscard]] u16 U16() const; | ||
| 68 | [[nodiscard]] u32 U32() const; | ||
| 69 | [[nodiscard]] f32 F32() const; | ||
| 70 | [[nodiscard]] u64 U64() const; | ||
| 71 | [[nodiscard]] f64 F64() const; | ||
| 72 | |||
| 73 | [[nodiscard]] bool operator==(const Value& other) const; | ||
| 74 | [[nodiscard]] bool operator!=(const Value& other) const; | ||
| 75 | |||
| 76 | private: | ||
| 77 | IR::Type type{}; | ||
| 78 | union { | ||
| 79 | IR::Inst* inst{}; | ||
| 80 | IR::Reg reg; | ||
| 81 | IR::Pred pred; | ||
| 82 | IR::Attribute attribute; | ||
| 83 | IR::Patch patch; | ||
| 84 | bool imm_u1; | ||
| 85 | u8 imm_u8; | ||
| 86 | u16 imm_u16; | ||
| 87 | u32 imm_u32; | ||
| 88 | f32 imm_f32; | ||
| 89 | u64 imm_u64; | ||
| 90 | f64 imm_f64; | ||
| 91 | }; | ||
| 92 | }; | ||
| 93 | static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero"); | ||
| 94 | static_assert(std::is_trivially_copyable_v<Value>); | ||
| 95 | |||
| 96 | template <IR::Type type_> | ||
| 97 | class TypedValue : public Value { | ||
| 98 | public: | ||
| 99 | TypedValue() = default; | ||
| 100 | |||
| 101 | template <IR::Type other_type> | ||
| 102 | requires((other_type & type_) != IR::Type::Void) explicit(false) | ||
| 103 | TypedValue(const TypedValue<other_type>& value) | ||
| 104 | : Value(value) {} | ||
| 105 | |||
| 106 | explicit TypedValue(const Value& value) : Value(value) { | ||
| 107 | if ((value.Type() & type_) == IR::Type::Void) { | ||
| 108 | throw InvalidArgument("Incompatible types {} and {}", type_, value.Type()); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {} | ||
| 113 | }; | ||
| 114 | |||
| 115 | class Inst : public boost::intrusive::list_base_hook<> { | ||
| 116 | public: | ||
| 117 | explicit Inst(IR::Opcode op_, u32 flags_) noexcept; | ||
| 118 | ~Inst(); | ||
| 119 | |||
| 120 | Inst& operator=(const Inst&) = delete; | ||
| 121 | Inst(const Inst&) = delete; | ||
| 122 | |||
| 123 | Inst& operator=(Inst&&) = delete; | ||
| 124 | Inst(Inst&&) = delete; | ||
| 125 | |||
| 126 | /// Get the number of uses this instruction has. | ||
| 127 | [[nodiscard]] int UseCount() const noexcept { | ||
| 128 | return use_count; | ||
| 129 | } | ||
| 130 | |||
| 131 | /// Determines whether this instruction has uses or not. | ||
| 132 | [[nodiscard]] bool HasUses() const noexcept { | ||
| 133 | return use_count > 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | /// Get the opcode this microinstruction represents. | ||
| 137 | [[nodiscard]] IR::Opcode GetOpcode() const noexcept { | ||
| 138 | return op; | ||
| 139 | } | ||
| 140 | |||
| 141 | /// Determines if there is a pseudo-operation associated with this instruction. | ||
| 142 | [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept { | ||
| 143 | return associated_insts != nullptr; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Determines whether or not this instruction may have side effects. | ||
| 147 | [[nodiscard]] bool MayHaveSideEffects() const noexcept; | ||
| 148 | |||
| 149 | /// Determines whether or not this instruction is a pseudo-instruction. | ||
| 150 | /// Pseudo-instructions depend on their parent instructions for their semantics. | ||
| 151 | [[nodiscard]] bool IsPseudoInstruction() const noexcept; | ||
| 152 | |||
| 153 | /// Determines if all arguments of this instruction are immediates. | ||
| 154 | [[nodiscard]] bool AreAllArgsImmediates() const; | ||
| 155 | |||
| 156 | /// Gets a pseudo-operation associated with this instruction | ||
| 157 | [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode); | ||
| 158 | |||
| 159 | /// Get the type this instruction returns. | ||
| 160 | [[nodiscard]] IR::Type Type() const; | ||
| 161 | |||
| 162 | /// Get the number of arguments this instruction has. | ||
| 163 | [[nodiscard]] size_t NumArgs() const { | ||
| 164 | return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op); | ||
| 165 | } | ||
| 166 | |||
| 167 | /// Get the value of a given argument index. | ||
| 168 | [[nodiscard]] Value Arg(size_t index) const noexcept { | ||
| 169 | if (op == IR::Opcode::Phi) { | ||
| 170 | return phi_args[index].second; | ||
| 171 | } else { | ||
| 172 | return args[index]; | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | /// Set the value of a given argument index. | ||
| 177 | void SetArg(size_t index, Value value); | ||
| 178 | |||
| 179 | /// Get a pointer to the block of a phi argument. | ||
| 180 | [[nodiscard]] Block* PhiBlock(size_t index) const; | ||
| 181 | /// Add phi operand to a phi instruction. | ||
| 182 | void AddPhiOperand(Block* predecessor, const Value& value); | ||
| 183 | |||
| 184 | void Invalidate(); | ||
| 185 | void ClearArgs(); | ||
| 186 | |||
| 187 | void ReplaceUsesWith(Value replacement); | ||
| 188 | |||
| 189 | void ReplaceOpcode(IR::Opcode opcode); | ||
| 190 | |||
| 191 | template <typename FlagsType> | ||
| 192 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | ||
| 193 | [[nodiscard]] FlagsType Flags() const noexcept { | ||
| 194 | FlagsType ret; | ||
| 195 | std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret)); | ||
| 196 | return ret; | ||
| 197 | } | ||
| 198 | |||
| 199 | template <typename FlagsType> | ||
| 200 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | ||
| 201 | [[nodiscard]] void SetFlags(FlagsType value) noexcept { | ||
| 202 | std::memcpy(&flags, &value, sizeof(value)); | ||
| 203 | } | ||
| 204 | |||
| 205 | /// Intrusively store the host definition of this instruction. | ||
| 206 | template <typename DefinitionType> | ||
| 207 | void SetDefinition(DefinitionType def) { | ||
| 208 | definition = Common::BitCast<u32>(def); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Return the intrusively stored host definition of this instruction. | ||
| 212 | template <typename DefinitionType> | ||
| 213 | [[nodiscard]] DefinitionType Definition() const noexcept { | ||
| 214 | return Common::BitCast<DefinitionType>(definition); | ||
| 215 | } | ||
| 216 | |||
| 217 | /// Destructively remove one reference count from the instruction | ||
| 218 | /// Useful for register allocation | ||
| 219 | void DestructiveRemoveUsage() { | ||
| 220 | --use_count; | ||
| 221 | } | ||
| 222 | |||
| 223 | /// Destructively add usages to the instruction | ||
| 224 | /// Useful for register allocation | ||
| 225 | void DestructiveAddUsage(int count) { | ||
| 226 | use_count += count; | ||
| 227 | } | ||
| 228 | |||
| 229 | private: | ||
| 230 | struct NonTriviallyDummy { | ||
| 231 | NonTriviallyDummy() noexcept {} | ||
| 232 | }; | ||
| 233 | |||
| 234 | void Use(const Value& value); | ||
| 235 | void UndoUse(const Value& value); | ||
| 236 | |||
| 237 | IR::Opcode op{}; | ||
| 238 | int use_count{}; | ||
| 239 | u32 flags{}; | ||
| 240 | u32 definition{}; | ||
| 241 | union { | ||
| 242 | NonTriviallyDummy dummy{}; | ||
| 243 | boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args; | ||
| 244 | std::array<Value, 5> args; | ||
| 245 | }; | ||
| 246 | std::unique_ptr<AssociatedInsts> associated_insts; | ||
| 247 | }; | ||
| 248 | static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | ||
| 249 | |||
| 250 | struct AssociatedInsts { | ||
| 251 | union { | ||
| 252 | Inst* in_bounds_inst; | ||
| 253 | Inst* sparse_inst; | ||
| 254 | Inst* zero_inst{}; | ||
| 255 | }; | ||
| 256 | Inst* sign_inst{}; | ||
| 257 | Inst* carry_inst{}; | ||
| 258 | Inst* overflow_inst{}; | ||
| 259 | }; | ||
| 260 | |||
| 261 | using U1 = TypedValue<Type::U1>; | ||
| 262 | using U8 = TypedValue<Type::U8>; | ||
| 263 | using U16 = TypedValue<Type::U16>; | ||
| 264 | using U32 = TypedValue<Type::U32>; | ||
| 265 | using U64 = TypedValue<Type::U64>; | ||
| 266 | using F16 = TypedValue<Type::F16>; | ||
| 267 | using F32 = TypedValue<Type::F32>; | ||
| 268 | using F64 = TypedValue<Type::F64>; | ||
| 269 | using U32U64 = TypedValue<Type::U32 | Type::U64>; | ||
| 270 | using F32F64 = TypedValue<Type::F32 | Type::F64>; | ||
| 271 | using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; | ||
| 272 | using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>; | ||
| 273 | using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; | ||
| 274 | |||
| 275 | inline bool Value::IsIdentity() const noexcept { | ||
| 276 | return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity; | ||
| 277 | } | ||
| 278 | |||
| 279 | inline bool Value::IsPhi() const noexcept { | ||
| 280 | return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi; | ||
| 281 | } | ||
| 282 | |||
| 283 | inline bool Value::IsEmpty() const noexcept { | ||
| 284 | return type == Type::Void; | ||
| 285 | } | ||
| 286 | |||
| 287 | inline bool Value::IsImmediate() const noexcept { | ||
| 288 | IR::Type current_type{type}; | ||
| 289 | const IR::Inst* current_inst{inst}; | ||
| 290 | while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) { | ||
| 291 | const Value& arg{current_inst->Arg(0)}; | ||
| 292 | current_type = arg.type; | ||
| 293 | current_inst = arg.inst; | ||
| 294 | } | ||
| 295 | return current_type != Type::Opaque; | ||
| 296 | } | ||
| 297 | |||
| 298 | inline IR::Inst* Value::Inst() const { | ||
| 299 | DEBUG_ASSERT(type == Type::Opaque); | ||
| 300 | return inst; | ||
| 301 | } | ||
| 302 | |||
| 303 | inline IR::Inst* Value::InstRecursive() const { | ||
| 304 | DEBUG_ASSERT(type == Type::Opaque); | ||
| 305 | if (IsIdentity()) { | ||
| 306 | return inst->Arg(0).InstRecursive(); | ||
| 307 | } | ||
| 308 | return inst; | ||
| 309 | } | ||
| 310 | |||
| 311 | inline IR::Value Value::Resolve() const { | ||
| 312 | if (IsIdentity()) { | ||
| 313 | return inst->Arg(0).Resolve(); | ||
| 314 | } | ||
| 315 | return *this; | ||
| 316 | } | ||
| 317 | |||
| 318 | inline IR::Reg Value::Reg() const { | ||
| 319 | DEBUG_ASSERT(type == Type::Reg); | ||
| 320 | return reg; | ||
| 321 | } | ||
| 322 | |||
| 323 | inline IR::Pred Value::Pred() const { | ||
| 324 | DEBUG_ASSERT(type == Type::Pred); | ||
| 325 | return pred; | ||
| 326 | } | ||
| 327 | |||
| 328 | inline IR::Attribute Value::Attribute() const { | ||
| 329 | DEBUG_ASSERT(type == Type::Attribute); | ||
| 330 | return attribute; | ||
| 331 | } | ||
| 332 | |||
| 333 | inline IR::Patch Value::Patch() const { | ||
| 334 | DEBUG_ASSERT(type == Type::Patch); | ||
| 335 | return patch; | ||
| 336 | } | ||
| 337 | |||
| 338 | inline bool Value::U1() const { | ||
| 339 | if (IsIdentity()) { | ||
| 340 | return inst->Arg(0).U1(); | ||
| 341 | } | ||
| 342 | DEBUG_ASSERT(type == Type::U1); | ||
| 343 | return imm_u1; | ||
| 344 | } | ||
| 345 | |||
| 346 | inline u8 Value::U8() const { | ||
| 347 | if (IsIdentity()) { | ||
| 348 | return inst->Arg(0).U8(); | ||
| 349 | } | ||
| 350 | DEBUG_ASSERT(type == Type::U8); | ||
| 351 | return imm_u8; | ||
| 352 | } | ||
| 353 | |||
| 354 | inline u16 Value::U16() const { | ||
| 355 | if (IsIdentity()) { | ||
| 356 | return inst->Arg(0).U16(); | ||
| 357 | } | ||
| 358 | DEBUG_ASSERT(type == Type::U16); | ||
| 359 | return imm_u16; | ||
| 360 | } | ||
| 361 | |||
| 362 | inline u32 Value::U32() const { | ||
| 363 | if (IsIdentity()) { | ||
| 364 | return inst->Arg(0).U32(); | ||
| 365 | } | ||
| 366 | DEBUG_ASSERT(type == Type::U32); | ||
| 367 | return imm_u32; | ||
| 368 | } | ||
| 369 | |||
| 370 | inline f32 Value::F32() const { | ||
| 371 | if (IsIdentity()) { | ||
| 372 | return inst->Arg(0).F32(); | ||
| 373 | } | ||
| 374 | DEBUG_ASSERT(type == Type::F32); | ||
| 375 | return imm_f32; | ||
| 376 | } | ||
| 377 | |||
| 378 | inline u64 Value::U64() const { | ||
| 379 | if (IsIdentity()) { | ||
| 380 | return inst->Arg(0).U64(); | ||
| 381 | } | ||
| 382 | DEBUG_ASSERT(type == Type::U64); | ||
| 383 | return imm_u64; | ||
| 384 | } | ||
| 385 | |||
| 386 | inline f64 Value::F64() const { | ||
| 387 | if (IsIdentity()) { | ||
| 388 | return inst->Arg(0).F64(); | ||
| 389 | } | ||
| 390 | DEBUG_ASSERT(type == Type::F64); | ||
| 391 | return imm_f64; | ||
| 392 | } | ||
| 393 | |||
| 394 | [[nodiscard]] inline bool IsPhi(const Inst& inst) { | ||
| 395 | return inst.GetOpcode() == Opcode::Phi; | ||
| 396 | } | ||
| 397 | |||
| 398 | } // namespace Shader::IR | ||
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..1a954a509 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp | |||
| @@ -0,0 +1,642 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <optional> | ||
| 8 | #include <string> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/exception.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 16 | #include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" | ||
| 17 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 18 | |||
| 19 | namespace Shader::Maxwell::Flow { | ||
| 20 | namespace { | ||
| 21 | struct Compare { | ||
| 22 | bool operator()(const Block& lhs, Location rhs) const noexcept { | ||
| 23 | return lhs.begin < rhs; | ||
| 24 | } | ||
| 25 | |||
| 26 | bool operator()(Location lhs, const Block& rhs) const noexcept { | ||
| 27 | return lhs < rhs.begin; | ||
| 28 | } | ||
| 29 | |||
| 30 | bool operator()(const Block& lhs, const Block& rhs) const noexcept { | ||
| 31 | return lhs.begin < rhs.begin; | ||
| 32 | } | ||
| 33 | }; | ||
| 34 | |||
| 35 | u32 BranchOffset(Location pc, Instruction inst) { | ||
| 36 | return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u; | ||
| 37 | } | ||
| 38 | |||
| 39 | void Split(Block* old_block, Block* new_block, Location pc) { | ||
| 40 | if (pc <= old_block->begin || pc >= old_block->end) { | ||
| 41 | throw InvalidArgument("Invalid address to split={}", pc); | ||
| 42 | } | ||
| 43 | *new_block = Block{}; | ||
| 44 | new_block->begin = pc; | ||
| 45 | new_block->end = old_block->end; | ||
| 46 | new_block->end_class = old_block->end_class; | ||
| 47 | new_block->cond = old_block->cond; | ||
| 48 | new_block->stack = old_block->stack; | ||
| 49 | new_block->branch_true = old_block->branch_true; | ||
| 50 | new_block->branch_false = old_block->branch_false; | ||
| 51 | new_block->function_call = old_block->function_call; | ||
| 52 | new_block->return_block = old_block->return_block; | ||
| 53 | new_block->branch_reg = old_block->branch_reg; | ||
| 54 | new_block->branch_offset = old_block->branch_offset; | ||
| 55 | new_block->indirect_branches = std::move(old_block->indirect_branches); | ||
| 56 | |||
| 57 | const Location old_begin{old_block->begin}; | ||
| 58 | Stack old_stack{std::move(old_block->stack)}; | ||
| 59 | *old_block = Block{}; | ||
| 60 | old_block->begin = old_begin; | ||
| 61 | old_block->end = pc; | ||
| 62 | old_block->end_class = EndClass::Branch; | ||
| 63 | old_block->cond = IR::Condition(true); | ||
| 64 | old_block->stack = old_stack; | ||
| 65 | old_block->branch_true = new_block; | ||
| 66 | old_block->branch_false = nullptr; | ||
| 67 | } | ||
| 68 | |||
| 69 | Token OpcodeToken(Opcode opcode) { | ||
| 70 | switch (opcode) { | ||
| 71 | case Opcode::PBK: | ||
| 72 | case Opcode::BRK: | ||
| 73 | return Token::PBK; | ||
| 74 | case Opcode::PCNT: | ||
| 75 | case Opcode::CONT: | ||
| 76 | return Token::PBK; | ||
| 77 | case Opcode::PEXIT: | ||
| 78 | case Opcode::EXIT: | ||
| 79 | return Token::PEXIT; | ||
| 80 | case Opcode::PLONGJMP: | ||
| 81 | case Opcode::LONGJMP: | ||
| 82 | return Token::PLONGJMP; | ||
| 83 | case Opcode::PRET: | ||
| 84 | case Opcode::RET: | ||
| 85 | case Opcode::CAL: | ||
| 86 | return Token::PRET; | ||
| 87 | case Opcode::SSY: | ||
| 88 | case Opcode::SYNC: | ||
| 89 | return Token::SSY; | ||
| 90 | default: | ||
| 91 | throw InvalidArgument("{}", opcode); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | bool IsAbsoluteJump(Opcode opcode) { | ||
| 96 | switch (opcode) { | ||
| 97 | case Opcode::JCAL: | ||
| 98 | case Opcode::JMP: | ||
| 99 | case Opcode::JMX: | ||
| 100 | return true; | ||
| 101 | default: | ||
| 102 | return false; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | bool HasFlowTest(Opcode opcode) { | ||
| 107 | switch (opcode) { | ||
| 108 | case Opcode::BRA: | ||
| 109 | case Opcode::BRX: | ||
| 110 | case Opcode::EXIT: | ||
| 111 | case Opcode::JMP: | ||
| 112 | case Opcode::JMX: | ||
| 113 | case Opcode::KIL: | ||
| 114 | case Opcode::BRK: | ||
| 115 | case Opcode::CONT: | ||
| 116 | case Opcode::LONGJMP: | ||
| 117 | case Opcode::RET: | ||
| 118 | case Opcode::SYNC: | ||
| 119 | return true; | ||
| 120 | case Opcode::CAL: | ||
| 121 | case Opcode::JCAL: | ||
| 122 | return false; | ||
| 123 | default: | ||
| 124 | throw InvalidArgument("Invalid branch {}", opcode); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | std::string NameOf(const Block& block) { | ||
| 129 | if (block.begin.IsVirtual()) { | ||
| 130 | return fmt::format("\"Virtual {}\"", block.begin); | ||
| 131 | } else { | ||
| 132 | return fmt::format("\"{}\"", block.begin); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } // Anonymous namespace | ||
| 136 | |||
| 137 | void Stack::Push(Token token, Location target) { | ||
| 138 | entries.push_back({ | ||
| 139 | .token = token, | ||
| 140 | .target{target}, | ||
| 141 | }); | ||
| 142 | } | ||
| 143 | |||
| 144 | std::pair<Location, Stack> Stack::Pop(Token token) const { | ||
| 145 | const std::optional<Location> pc{Peek(token)}; | ||
| 146 | if (!pc) { | ||
| 147 | throw LogicError("Token could not be found"); | ||
| 148 | } | ||
| 149 | return {*pc, Remove(token)}; | ||
| 150 | } | ||
| 151 | |||
| 152 | std::optional<Location> Stack::Peek(Token token) const { | ||
| 153 | const auto it{std::find_if(entries.rbegin(), entries.rend(), | ||
| 154 | [token](const auto& entry) { return entry.token == token; })}; | ||
| 155 | if (it == entries.rend()) { | ||
| 156 | return std::nullopt; | ||
| 157 | } | ||
| 158 | return it->target; | ||
| 159 | } | ||
| 160 | |||
| 161 | Stack Stack::Remove(Token token) const { | ||
| 162 | const auto it{std::find_if(entries.rbegin(), entries.rend(), | ||
| 163 | [token](const auto& entry) { return entry.token == token; })}; | ||
| 164 | const auto pos{std::distance(entries.rbegin(), it)}; | ||
| 165 | Stack result; | ||
| 166 | result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); | ||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | bool Block::Contains(Location pc) const noexcept { | ||
| 171 | return pc >= begin && pc < end; | ||
| 172 | } | ||
| 173 | |||
| 174 | Function::Function(ObjectPool<Block>& block_pool, Location start_address) | ||
| 175 | : entrypoint{start_address} { | ||
| 176 | Label& label{labels.emplace_back()}; | ||
| 177 | label.address = start_address; | ||
| 178 | label.block = block_pool.Create(Block{}); | ||
| 179 | label.block->begin = start_address; | ||
| 180 | label.block->end = start_address; | ||
| 181 | label.block->end_class = EndClass::Branch; | ||
| 182 | label.block->cond = IR::Condition(true); | ||
| 183 | label.block->branch_true = nullptr; | ||
| 184 | label.block->branch_false = nullptr; | ||
| 185 | } | ||
| 186 | |||
| 187 | CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address, | ||
| 188 | bool exits_to_dispatcher_) | ||
| 189 | : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ | ||
| 190 | exits_to_dispatcher_} { | ||
| 191 | if (exits_to_dispatcher) { | ||
| 192 | dispatch_block = block_pool.Create(Block{}); | ||
| 193 | dispatch_block->begin = {}; | ||
| 194 | dispatch_block->end = {}; | ||
| 195 | dispatch_block->end_class = EndClass::Exit; | ||
| 196 | dispatch_block->cond = IR::Condition(true); | ||
| 197 | dispatch_block->stack = {}; | ||
| 198 | dispatch_block->branch_true = nullptr; | ||
| 199 | dispatch_block->branch_false = nullptr; | ||
| 200 | } | ||
| 201 | functions.emplace_back(block_pool, start_address); | ||
| 202 | for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { | ||
| 203 | while (!functions[function_id].labels.empty()) { | ||
| 204 | Function& function{functions[function_id]}; | ||
| 205 | Label label{function.labels.back()}; | ||
| 206 | function.labels.pop_back(); | ||
| 207 | AnalyzeLabel(function_id, label); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | if (exits_to_dispatcher) { | ||
| 211 | const auto last_block{functions[0].blocks.rbegin()}; | ||
| 212 | dispatch_block->begin = last_block->end + 1; | ||
| 213 | dispatch_block->end = last_block->end + 1; | ||
| 214 | functions[0].blocks.insert(*dispatch_block); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { | ||
| 219 | if (InspectVisitedBlocks(function_id, label)) { | ||
| 220 | // Label address has been visited | ||
| 221 | return; | ||
| 222 | } | ||
| 223 | // Try to find the next block | ||
| 224 | Function* const function{&functions[function_id]}; | ||
| 225 | Location pc{label.address}; | ||
| 226 | const auto next_it{function->blocks.upper_bound(pc, Compare{})}; | ||
| 227 | const bool is_last{next_it == function->blocks.end()}; | ||
| 228 | Block* const next{is_last ? nullptr : &*next_it}; | ||
| 229 | // Insert before the next block | ||
| 230 | Block* const block{label.block}; | ||
| 231 | // Analyze instructions until it reaches an already visited block or there's a branch | ||
| 232 | bool is_branch{false}; | ||
| 233 | while (!next || pc < next->begin) { | ||
| 234 | is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; | ||
| 235 | if (is_branch) { | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | ++pc; | ||
| 239 | } | ||
| 240 | if (!is_branch) { | ||
| 241 | // If the block finished without a branch, | ||
| 242 | // it means that the next instruction is already visited, jump to it | ||
| 243 | block->end = pc; | ||
| 244 | block->cond = IR::Condition{true}; | ||
| 245 | block->branch_true = next; | ||
| 246 | block->branch_false = nullptr; | ||
| 247 | } | ||
| 248 | // Function's pointer might be invalid, resolve it again | ||
| 249 | // Insert the new block | ||
| 250 | functions[function_id].blocks.insert(*block); | ||
| 251 | } | ||
| 252 | |||
| 253 | bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { | ||
| 254 | const Location pc{label.address}; | ||
| 255 | Function& function{functions[function_id]}; | ||
| 256 | const auto it{ | ||
| 257 | std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; | ||
| 258 | if (it == function.blocks.end()) { | ||
| 259 | // Address has not been visited | ||
| 260 | return false; | ||
| 261 | } | ||
| 262 | Block* const visited_block{&*it}; | ||
| 263 | if (visited_block->begin == pc) { | ||
| 264 | throw LogicError("Dangling block"); | ||
| 265 | } | ||
| 266 | Block* const new_block{label.block}; | ||
| 267 | Split(visited_block, new_block, pc); | ||
| 268 | function.blocks.insert(it, *new_block); | ||
| 269 | return true; | ||
| 270 | } | ||
| 271 | |||
| 272 | CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { | ||
| 273 | const Instruction inst{env.ReadInstruction(pc.Offset())}; | ||
| 274 | const Opcode opcode{Decode(inst.raw)}; | ||
| 275 | switch (opcode) { | ||
| 276 | case Opcode::BRA: | ||
| 277 | case Opcode::JMP: | ||
| 278 | case Opcode::RET: | ||
| 279 | if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { | ||
| 280 | return AnalysisState::Continue; | ||
| 281 | } | ||
| 282 | switch (opcode) { | ||
| 283 | case Opcode::BRA: | ||
| 284 | case Opcode::JMP: | ||
| 285 | AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); | ||
| 286 | break; | ||
| 287 | case Opcode::RET: | ||
| 288 | block->end_class = EndClass::Return; | ||
| 289 | break; | ||
| 290 | default: | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | block->end = pc; | ||
| 294 | return AnalysisState::Branch; | ||
| 295 | case Opcode::BRK: | ||
| 296 | case Opcode::CONT: | ||
| 297 | case Opcode::LONGJMP: | ||
| 298 | case Opcode::SYNC: { | ||
| 299 | if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { | ||
| 300 | return AnalysisState::Continue; | ||
| 301 | } | ||
| 302 | const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; | ||
| 303 | block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); | ||
| 304 | block->end = pc; | ||
| 305 | return AnalysisState::Branch; | ||
| 306 | } | ||
| 307 | case Opcode::KIL: { | ||
| 308 | const Predicate pred{inst.Pred()}; | ||
| 309 | const auto ir_pred{static_cast<IR::Pred>(pred.index)}; | ||
| 310 | const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; | ||
| 311 | AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); | ||
| 312 | return AnalysisState::Branch; | ||
| 313 | } | ||
| 314 | case Opcode::PBK: | ||
| 315 | case Opcode::PCNT: | ||
| 316 | case Opcode::PEXIT: | ||
| 317 | case Opcode::PLONGJMP: | ||
| 318 | case Opcode::SSY: | ||
| 319 | block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); | ||
| 320 | return AnalysisState::Continue; | ||
| 321 | case Opcode::BRX: | ||
| 322 | case Opcode::JMX: | ||
| 323 | return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); | ||
| 324 | case Opcode::EXIT: | ||
| 325 | return AnalyzeEXIT(block, function_id, pc, inst); | ||
| 326 | case Opcode::PRET: | ||
| 327 | throw NotImplementedException("PRET flow analysis"); | ||
| 328 | case Opcode::CAL: | ||
| 329 | case Opcode::JCAL: { | ||
| 330 | const bool is_absolute{IsAbsoluteJump(opcode)}; | ||
| 331 | const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; | ||
| 332 | // Technically CAL pushes into PRET, but that's implicit in the function call for us | ||
| 333 | // Insert the function into the list if it doesn't exist | ||
| 334 | const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; | ||
| 335 | const bool exists{it != functions.end()}; | ||
| 336 | const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it)) | ||
| 337 | : functions.size()}; | ||
| 338 | if (!exists) { | ||
| 339 | functions.emplace_back(block_pool, cal_pc); | ||
| 340 | } | ||
| 341 | block->end_class = EndClass::Call; | ||
| 342 | block->function_call = call_id; | ||
| 343 | block->return_block = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 344 | block->end = pc; | ||
| 345 | return AnalysisState::Branch; | ||
| 346 | } | ||
| 347 | default: | ||
| 348 | break; | ||
| 349 | } | ||
| 350 | const Predicate pred{inst.Pred()}; | ||
| 351 | if (pred == Predicate{true} || pred == Predicate{false}) { | ||
| 352 | return AnalysisState::Continue; | ||
| 353 | } | ||
| 354 | const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; | ||
| 355 | AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); | ||
| 356 | return AnalysisState::Branch; | ||
| 357 | } | ||
| 358 | |||
| 359 | void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, | ||
| 360 | EndClass insn_end_class, IR::Condition cond) { | ||
| 361 | if (block->begin != pc) { | ||
| 362 | // If the block doesn't start in the conditional instruction | ||
| 363 | // mark it as a label to visit it later | ||
| 364 | block->end = pc; | ||
| 365 | block->cond = IR::Condition{true}; | ||
| 366 | block->branch_true = AddLabel(block, block->stack, pc, function_id); | ||
| 367 | block->branch_false = nullptr; | ||
| 368 | return; | ||
| 369 | } | ||
| 370 | // Create a virtual block and a conditional block | ||
| 371 | Block* const conditional_block{block_pool.Create()}; | ||
| 372 | Block virtual_block{}; | ||
| 373 | virtual_block.begin = block->begin.Virtual(); | ||
| 374 | virtual_block.end = block->begin.Virtual(); | ||
| 375 | virtual_block.end_class = EndClass::Branch; | ||
| 376 | virtual_block.stack = block->stack; | ||
| 377 | virtual_block.cond = cond; | ||
| 378 | virtual_block.branch_true = conditional_block; | ||
| 379 | virtual_block.branch_false = nullptr; | ||
| 380 | // Save the contents of the visited block in the conditional block | ||
| 381 | *conditional_block = std::move(*block); | ||
| 382 | // Impersonate the visited block with a virtual block | ||
| 383 | *block = std::move(virtual_block); | ||
| 384 | // Set the end properties of the conditional instruction | ||
| 385 | conditional_block->end = pc + 1; | ||
| 386 | conditional_block->end_class = insn_end_class; | ||
| 387 | // Add a label to the instruction after the conditional instruction | ||
| 388 | Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; | ||
| 389 | // Branch to the next instruction from the virtual block | ||
| 390 | block->branch_false = endif_block; | ||
| 391 | // And branch to it from the conditional instruction if it is a branch or a kill instruction | ||
| 392 | // Kill instructions are considered a branch because they demote to a helper invocation and | ||
| 393 | // execution may continue. | ||
| 394 | if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { | ||
| 395 | conditional_block->cond = IR::Condition{true}; | ||
| 396 | conditional_block->branch_true = endif_block; | ||
| 397 | conditional_block->branch_false = nullptr; | ||
| 398 | } | ||
| 399 | // Finally insert the condition block into the list of blocks | ||
| 400 | functions[function_id].blocks.insert(*conditional_block); | ||
| 401 | } | ||
| 402 | |||
| 403 | bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 404 | Opcode opcode) { | ||
| 405 | if (inst.branch.is_cbuf) { | ||
| 406 | throw NotImplementedException("Branch with constant buffer offset"); | ||
| 407 | } | ||
| 408 | const Predicate pred{inst.Pred()}; | ||
| 409 | if (pred == Predicate{false}) { | ||
| 410 | return false; | ||
| 411 | } | ||
| 412 | const bool has_flow_test{HasFlowTest(opcode)}; | ||
| 413 | const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; | ||
| 414 | if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { | ||
| 415 | block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); | ||
| 416 | block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 417 | } else { | ||
| 418 | block->cond = IR::Condition{true}; | ||
| 419 | } | ||
| 420 | return true; | ||
| 421 | } | ||
| 422 | |||
| 423 | void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 424 | bool is_absolute) { | ||
| 425 | const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; | ||
| 426 | block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); | ||
| 427 | } | ||
| 428 | |||
| 429 | CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, | ||
| 430 | FunctionId function_id) { | ||
| 431 | const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; | ||
| 432 | if (!brx_table) { | ||
| 433 | TrackIndirectBranchTable(env, pc, program_start); | ||
| 434 | throw NotImplementedException("Failed to track indirect branch"); | ||
| 435 | } | ||
| 436 | const IR::FlowTest flow_test{inst.branch.flow_test}; | ||
| 437 | const Predicate pred{inst.Pred()}; | ||
| 438 | if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { | ||
| 439 | throw NotImplementedException("Conditional indirect branch"); | ||
| 440 | } | ||
| 441 | std::vector<u32> targets; | ||
| 442 | targets.reserve(brx_table->num_entries); | ||
| 443 | for (u32 i = 0; i < brx_table->num_entries; ++i) { | ||
| 444 | u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; | ||
| 445 | if (!is_absolute) { | ||
| 446 | target += pc.Offset(); | ||
| 447 | } | ||
| 448 | target += static_cast<u32>(brx_table->branch_offset); | ||
| 449 | target += 8; | ||
| 450 | targets.push_back(target); | ||
| 451 | } | ||
| 452 | std::ranges::sort(targets); | ||
| 453 | targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); | ||
| 454 | |||
| 455 | block->indirect_branches.reserve(targets.size()); | ||
| 456 | for (const u32 target : targets) { | ||
| 457 | Block* const branch{AddLabel(block, block->stack, target, function_id)}; | ||
| 458 | block->indirect_branches.push_back({ | ||
| 459 | .block = branch, | ||
| 460 | .address = target, | ||
| 461 | }); | ||
| 462 | } | ||
| 463 | block->cond = IR::Condition{true}; | ||
| 464 | block->end = pc + 1; | ||
| 465 | block->end_class = EndClass::IndirectBranch; | ||
| 466 | block->branch_reg = brx_table->branch_reg; | ||
| 467 | block->branch_offset = brx_table->branch_offset + 8; | ||
| 468 | if (!is_absolute) { | ||
| 469 | block->branch_offset += pc.Offset(); | ||
| 470 | } | ||
| 471 | return AnalysisState::Branch; | ||
| 472 | } | ||
| 473 | |||
| 474 | CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, | ||
| 475 | Instruction inst) { | ||
| 476 | const IR::FlowTest flow_test{inst.branch.flow_test}; | ||
| 477 | const Predicate pred{inst.Pred()}; | ||
| 478 | if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { | ||
| 479 | // EXIT will never be taken | ||
| 480 | return AnalysisState::Continue; | ||
| 481 | } | ||
| 482 | if (exits_to_dispatcher && function_id != 0) { | ||
| 483 | throw NotImplementedException("Dispatch EXIT on external function"); | ||
| 484 | } | ||
| 485 | if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { | ||
| 486 | if (block->stack.Peek(Token::PEXIT).has_value()) { | ||
| 487 | throw NotImplementedException("Conditional EXIT with PEXIT token"); | ||
| 488 | } | ||
| 489 | const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; | ||
| 490 | if (exits_to_dispatcher) { | ||
| 491 | block->end = pc; | ||
| 492 | block->end_class = EndClass::Branch; | ||
| 493 | block->cond = cond; | ||
| 494 | block->branch_true = dispatch_block; | ||
| 495 | block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 496 | return AnalysisState::Branch; | ||
| 497 | } | ||
| 498 | AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); | ||
| 499 | return AnalysisState::Branch; | ||
| 500 | } | ||
| 501 | if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { | ||
| 502 | const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; | ||
| 503 | block->cond = IR::Condition{true}; | ||
| 504 | block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); | ||
| 505 | block->branch_false = nullptr; | ||
| 506 | return AnalysisState::Branch; | ||
| 507 | } | ||
| 508 | if (exits_to_dispatcher) { | ||
| 509 | block->cond = IR::Condition{true}; | ||
| 510 | block->end = pc; | ||
| 511 | block->end_class = EndClass::Branch; | ||
| 512 | block->branch_true = dispatch_block; | ||
| 513 | block->branch_false = nullptr; | ||
| 514 | return AnalysisState::Branch; | ||
| 515 | } | ||
| 516 | block->end = pc + 1; | ||
| 517 | block->end_class = EndClass::Exit; | ||
| 518 | return AnalysisState::Branch; | ||
| 519 | } | ||
| 520 | |||
| 521 | Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { | ||
| 522 | Function& function{functions[function_id]}; | ||
| 523 | if (block->begin == pc) { | ||
| 524 | // Jumps to itself | ||
| 525 | return block; | ||
| 526 | } | ||
| 527 | if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { | ||
| 528 | // Block already exists and it has been visited | ||
| 529 | if (function.blocks.begin() != it) { | ||
| 530 | // Check if the previous node is the virtual variant of the label | ||
| 531 | // This won't exist if a virtual node is not needed or it hasn't been visited | ||
| 532 | // If it hasn't been visited and a virtual node is needed, this will still behave as | ||
| 533 | // expected because the node impersonated with its virtual node. | ||
| 534 | const auto prev{std::prev(it)}; | ||
| 535 | if (it->begin.Virtual() == prev->begin) { | ||
| 536 | return &*prev; | ||
| 537 | } | ||
| 538 | } | ||
| 539 | return &*it; | ||
| 540 | } | ||
| 541 | // Make sure we don't insert the same layer twice | ||
| 542 | const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; | ||
| 543 | if (label_it != function.labels.end()) { | ||
| 544 | return label_it->block; | ||
| 545 | } | ||
| 546 | Block* const new_block{block_pool.Create()}; | ||
| 547 | new_block->begin = pc; | ||
| 548 | new_block->end = pc; | ||
| 549 | new_block->end_class = EndClass::Branch; | ||
| 550 | new_block->cond = IR::Condition(true); | ||
| 551 | new_block->stack = stack; | ||
| 552 | new_block->branch_true = nullptr; | ||
| 553 | new_block->branch_false = nullptr; | ||
| 554 | function.labels.push_back(Label{ | ||
| 555 | .address{pc}, | ||
| 556 | .block = new_block, | ||
| 557 | .stack{std::move(stack)}, | ||
| 558 | }); | ||
| 559 | return new_block; | ||
| 560 | } | ||
| 561 | |||
| 562 | std::string CFG::Dot() const { | ||
| 563 | int node_uid{0}; | ||
| 564 | |||
| 565 | std::string dot{"digraph shader {\n"}; | ||
| 566 | for (const Function& function : functions) { | ||
| 567 | dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); | ||
| 568 | dot += fmt::format("\t\tnode [style=filled];\n"); | ||
| 569 | for (const Block& block : function.blocks) { | ||
| 570 | const std::string name{NameOf(block)}; | ||
| 571 | const auto add_branch = [&](Block* branch, bool add_label) { | ||
| 572 | dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); | ||
| 573 | if (add_label && block.cond != IR::Condition{true} && | ||
| 574 | block.cond != IR::Condition{false}) { | ||
| 575 | dot += fmt::format(" [label=\"{}\"]", block.cond); | ||
| 576 | } | ||
| 577 | dot += '\n'; | ||
| 578 | }; | ||
| 579 | dot += fmt::format("\t\t{};\n", name); | ||
| 580 | switch (block.end_class) { | ||
| 581 | case EndClass::Branch: | ||
| 582 | if (block.cond != IR::Condition{false}) { | ||
| 583 | add_branch(block.branch_true, true); | ||
| 584 | } | ||
| 585 | if (block.cond != IR::Condition{true}) { | ||
| 586 | add_branch(block.branch_false, false); | ||
| 587 | } | ||
| 588 | break; | ||
| 589 | case EndClass::IndirectBranch: | ||
| 590 | for (const IndirectBranch& branch : block.indirect_branches) { | ||
| 591 | add_branch(branch.block, false); | ||
| 592 | } | ||
| 593 | break; | ||
| 594 | case EndClass::Call: | ||
| 595 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 596 | dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); | ||
| 597 | dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", | ||
| 598 | node_uid, block.function_call); | ||
| 599 | dot += '\n'; | ||
| 600 | ++node_uid; | ||
| 601 | break; | ||
| 602 | case EndClass::Exit: | ||
| 603 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 604 | dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", | ||
| 605 | node_uid); | ||
| 606 | ++node_uid; | ||
| 607 | break; | ||
| 608 | case EndClass::Return: | ||
| 609 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 610 | dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", | ||
| 611 | node_uid); | ||
| 612 | ++node_uid; | ||
| 613 | break; | ||
| 614 | case EndClass::Kill: | ||
| 615 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 616 | dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", | ||
| 617 | node_uid); | ||
| 618 | ++node_uid; | ||
| 619 | break; | ||
| 620 | } | ||
| 621 | } | ||
| 622 | if (function.entrypoint == 8) { | ||
| 623 | dot += fmt::format("\t\tlabel = \"main\";\n"); | ||
| 624 | } else { | ||
| 625 | dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); | ||
| 626 | } | ||
| 627 | dot += "\t}\n"; | ||
| 628 | } | ||
| 629 | if (!functions.empty()) { | ||
| 630 | auto& function{functions.front()}; | ||
| 631 | if (function.blocks.empty()) { | ||
| 632 | dot += "Start;\n"; | ||
| 633 | } else { | ||
| 634 | dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); | ||
| 635 | } | ||
| 636 | dot += fmt::format("\tStart [shape=diamond];\n"); | ||
| 637 | } | ||
| 638 | dot += "}\n"; | ||
| 639 | return dot; | ||
| 640 | } | ||
| 641 | |||
| 642 | } // namespace Shader::Maxwell::Flow | ||
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..a6bd3e196 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <string> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <boost/container/small_vector.hpp> | ||
| 14 | #include <boost/intrusive/set.hpp> | ||
| 15 | |||
| 16 | #include "shader_recompiler/environment.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 18 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 19 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 20 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 21 | #include "shader_recompiler/object_pool.h" | ||
| 22 | |||
| 23 | namespace Shader::Maxwell::Flow { | ||
| 24 | |||
| 25 | struct Block; | ||
| 26 | |||
| 27 | using FunctionId = size_t; | ||
| 28 | |||
| 29 | enum class EndClass { | ||
| 30 | Branch, | ||
| 31 | IndirectBranch, | ||
| 32 | Call, | ||
| 33 | Exit, | ||
| 34 | Return, | ||
| 35 | Kill, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class Token { | ||
| 39 | SSY, | ||
| 40 | PBK, | ||
| 41 | PEXIT, | ||
| 42 | PRET, | ||
| 43 | PCNT, | ||
| 44 | PLONGJMP, | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct StackEntry { | ||
| 48 | auto operator<=>(const StackEntry&) const noexcept = default; | ||
| 49 | |||
| 50 | Token token; | ||
| 51 | Location target; | ||
| 52 | }; | ||
| 53 | |||
| 54 | class Stack { | ||
| 55 | public: | ||
| 56 | void Push(Token token, Location target); | ||
| 57 | [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const; | ||
| 58 | [[nodiscard]] std::optional<Location> Peek(Token token) const; | ||
| 59 | [[nodiscard]] Stack Remove(Token token) const; | ||
| 60 | |||
| 61 | private: | ||
| 62 | boost::container::small_vector<StackEntry, 3> entries; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct IndirectBranch { | ||
| 66 | Block* block; | ||
| 67 | u32 address; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct Block : boost::intrusive::set_base_hook< | ||
| 71 | // Normal link is ~2.5% faster compared to safe link | ||
| 72 | boost::intrusive::link_mode<boost::intrusive::normal_link>> { | ||
| 73 | [[nodiscard]] bool Contains(Location pc) const noexcept; | ||
| 74 | |||
| 75 | bool operator<(const Block& rhs) const noexcept { | ||
| 76 | return begin < rhs.begin; | ||
| 77 | } | ||
| 78 | |||
| 79 | Location begin; | ||
| 80 | Location end; | ||
| 81 | EndClass end_class{}; | ||
| 82 | IR::Condition cond{}; | ||
| 83 | Stack stack; | ||
| 84 | Block* branch_true{}; | ||
| 85 | Block* branch_false{}; | ||
| 86 | FunctionId function_call{}; | ||
| 87 | Block* return_block{}; | ||
| 88 | IR::Reg branch_reg{}; | ||
| 89 | s32 branch_offset{}; | ||
| 90 | std::vector<IndirectBranch> indirect_branches; | ||
| 91 | }; | ||
| 92 | |||
| 93 | struct Label { | ||
| 94 | Location address; | ||
| 95 | Block* block; | ||
| 96 | Stack stack; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct Function { | ||
| 100 | explicit Function(ObjectPool<Block>& block_pool, Location start_address); | ||
| 101 | |||
| 102 | Location entrypoint; | ||
| 103 | boost::container::small_vector<Label, 16> labels; | ||
| 104 | boost::intrusive::set<Block> blocks; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class CFG { | ||
| 108 | enum class AnalysisState { | ||
| 109 | Branch, | ||
| 110 | Continue, | ||
| 111 | }; | ||
| 112 | |||
| 113 | public: | ||
| 114 | explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address, | ||
| 115 | bool exits_to_dispatcher = false); | ||
| 116 | |||
| 117 | CFG& operator=(const CFG&) = delete; | ||
| 118 | CFG(const CFG&) = delete; | ||
| 119 | |||
| 120 | CFG& operator=(CFG&&) = delete; | ||
| 121 | CFG(CFG&&) = delete; | ||
| 122 | |||
| 123 | [[nodiscard]] std::string Dot() const; | ||
| 124 | |||
| 125 | [[nodiscard]] std::span<const Function> Functions() const noexcept { | ||
| 126 | return std::span(functions.data(), functions.size()); | ||
| 127 | } | ||
| 128 | [[nodiscard]] std::span<Function> Functions() noexcept { | ||
| 129 | return std::span(functions.data(), functions.size()); | ||
| 130 | } | ||
| 131 | |||
| 132 | [[nodiscard]] bool ExitsToDispatcher() const { | ||
| 133 | return exits_to_dispatcher; | ||
| 134 | } | ||
| 135 | |||
| 136 | private: | ||
| 137 | void AnalyzeLabel(FunctionId function_id, Label& label); | ||
| 138 | |||
| 139 | /// Inspect already visited blocks. | ||
| 140 | /// Return true when the block has already been visited | ||
| 141 | bool InspectVisitedBlocks(FunctionId function_id, const Label& label); | ||
| 142 | |||
| 143 | AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); | ||
| 144 | |||
| 145 | void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, | ||
| 146 | IR::Condition cond); | ||
| 147 | |||
| 148 | /// Return true when the branch instruction is confirmed to be a branch | ||
| 149 | bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 150 | Opcode opcode); | ||
| 151 | |||
| 152 | void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 153 | bool is_absolute); | ||
| 154 | AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, | ||
| 155 | FunctionId function_id); | ||
| 156 | AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); | ||
| 157 | |||
| 158 | /// Return the branch target block id | ||
| 159 | Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); | ||
| 160 | |||
| 161 | Environment& env; | ||
| 162 | ObjectPool<Block>& block_pool; | ||
| 163 | boost::container::small_vector<Function, 1> functions; | ||
| 164 | Location program_start; | ||
| 165 | bool exits_to_dispatcher{}; | ||
| 166 | Block* dispatch_block{}; | ||
| 167 | }; | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell::Flow | ||
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..972f677dc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <bit> | ||
| 8 | #include <memory> | ||
| 9 | #include <string_view> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/exception.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 15 | |||
| 16 | namespace Shader::Maxwell { | ||
| 17 | namespace { | ||
| 18 | struct MaskValue { | ||
| 19 | u64 mask; | ||
| 20 | u64 value; | ||
| 21 | }; | ||
| 22 | |||
| 23 | constexpr MaskValue MaskValueFromEncoding(const char* encoding) { | ||
| 24 | u64 mask{}; | ||
| 25 | u64 value{}; | ||
| 26 | u64 bit{u64(1) << 63}; | ||
| 27 | while (*encoding) { | ||
| 28 | switch (*encoding) { | ||
| 29 | case '0': | ||
| 30 | mask |= bit; | ||
| 31 | break; | ||
| 32 | case '1': | ||
| 33 | mask |= bit; | ||
| 34 | value |= bit; | ||
| 35 | break; | ||
| 36 | case '-': | ||
| 37 | break; | ||
| 38 | case ' ': | ||
| 39 | break; | ||
| 40 | default: | ||
| 41 | throw LogicError("Invalid encoding character '{}'", *encoding); | ||
| 42 | } | ||
| 43 | ++encoding; | ||
| 44 | if (*encoding != ' ') { | ||
| 45 | bit >>= 1; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | return MaskValue{.mask = mask, .value = value}; | ||
| 49 | } | ||
| 50 | |||
| 51 | struct InstEncoding { | ||
| 52 | MaskValue mask_value; | ||
| 53 | Opcode opcode; | ||
| 54 | }; | ||
| 55 | constexpr std::array UNORDERED_ENCODINGS{ | ||
| 56 | #define INST(name, cute, encode) \ | ||
| 57 | InstEncoding{ \ | ||
| 58 | .mask_value{MaskValueFromEncoding(encode)}, \ | ||
| 59 | .opcode = Opcode::name, \ | ||
| 60 | }, | ||
| 61 | #include "maxwell.inc" | ||
| 62 | #undef INST | ||
| 63 | }; | ||
| 64 | |||
| 65 | constexpr auto SortedEncodings() { | ||
| 66 | std::array encodings{UNORDERED_ENCODINGS}; | ||
| 67 | std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { | ||
| 68 | return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); | ||
| 69 | }); | ||
| 70 | return encodings; | ||
| 71 | } | ||
| 72 | constexpr auto ENCODINGS{SortedEncodings()}; | ||
| 73 | |||
| 74 | constexpr int WidestLeftBits() { | ||
| 75 | int bits{64}; | ||
| 76 | for (const InstEncoding& encoding : ENCODINGS) { | ||
| 77 | bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); | ||
| 78 | } | ||
| 79 | return 64 - bits; | ||
| 80 | } | ||
| 81 | constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; | ||
| 82 | constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; | ||
| 83 | |||
| 84 | constexpr size_t ToFastLookupIndex(u64 value) { | ||
| 85 | return static_cast<size_t>(value >> MASK_SHIFT); | ||
| 86 | } | ||
| 87 | |||
| 88 | constexpr size_t FastLookupSize() { | ||
| 89 | size_t max_width{}; | ||
| 90 | for (const InstEncoding& encoding : ENCODINGS) { | ||
| 91 | max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); | ||
| 92 | } | ||
| 93 | return max_width + 1; | ||
| 94 | } | ||
| 95 | constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; | ||
| 96 | |||
| 97 | struct InstInfo { | ||
| 98 | [[nodiscard]] u64 Mask() const noexcept { | ||
| 99 | return static_cast<u64>(high_mask) << MASK_SHIFT; | ||
| 100 | } | ||
| 101 | |||
| 102 | [[nodiscard]] u64 Value() const noexcept { | ||
| 103 | return static_cast<u64>(high_value) << MASK_SHIFT; | ||
| 104 | } | ||
| 105 | |||
| 106 | u16 high_mask; | ||
| 107 | u16 high_value; | ||
| 108 | Opcode opcode; | ||
| 109 | }; | ||
| 110 | |||
| 111 | constexpr auto MakeFastLookupTableIndex(size_t index) { | ||
| 112 | std::array<InstInfo, 2> encodings{}; | ||
| 113 | size_t element{}; | ||
| 114 | for (const auto& encoding : ENCODINGS) { | ||
| 115 | const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; | ||
| 116 | const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; | ||
| 117 | if ((index & mask) == value) { | ||
| 118 | encodings.at(element) = InstInfo{ | ||
| 119 | .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT), | ||
| 120 | .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT), | ||
| 121 | .opcode = encoding.opcode, | ||
| 122 | }; | ||
| 123 | ++element; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | return encodings; | ||
| 127 | } | ||
| 128 | |||
| 129 | /*constexpr*/ auto MakeFastLookupTable() { | ||
| 130 | auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()}; | ||
| 131 | for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { | ||
| 132 | (*encodings)[index] = MakeFastLookupTableIndex(index); | ||
| 133 | } | ||
| 134 | return encodings; | ||
| 135 | } | ||
| 136 | const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; | ||
| 137 | } // Anonymous namespace | ||
| 138 | |||
| 139 | Opcode Decode(u64 insn) { | ||
| 140 | const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; | ||
| 141 | const auto it{std::ranges::find_if( | ||
| 142 | table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; | ||
| 143 | if (it == table.end()) { | ||
| 144 | throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); | ||
| 145 | } | ||
| 146 | return it->opcode; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..b4f080fd7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[nodiscard]] Opcode Decode(u64 insn); | ||
| 13 | |||
| 14 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..008625cb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 12 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | namespace { | ||
| 16 | union Encoding { | ||
| 17 | u64 raw; | ||
| 18 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 19 | BitField<8, 8, IR::Reg> src_reg; | ||
| 20 | BitField<20, 19, u64> immediate; | ||
| 21 | BitField<56, 1, u64> is_negative; | ||
| 22 | BitField<20, 24, s64> brx_offset; | ||
| 23 | }; | ||
| 24 | |||
| 25 | template <typename Callable> | ||
| 26 | std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { | ||
| 27 | while (pos >= block_begin) { | ||
| 28 | const u64 insn{env.ReadInstruction(pos.Offset())}; | ||
| 29 | --pos; | ||
| 30 | if (func(insn, Decode(insn))) { | ||
| 31 | return insn; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | return std::nullopt; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos, | ||
| 38 | IR::Reg brx_reg) { | ||
| 39 | return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { | ||
| 40 | const LDC::Encoding ldc{insn}; | ||
| 41 | return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && | ||
| 42 | ldc.mode == LDC::Mode::Default; | ||
| 43 | }); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos, | ||
| 47 | IR::Reg ldc_reg) { | ||
| 48 | return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { | ||
| 49 | const Encoding shl{insn}; | ||
| 50 | return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; | ||
| 51 | }); | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos, | ||
| 55 | IR::Reg shl_reg) { | ||
| 56 | return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { | ||
| 57 | const Encoding imnmx{insn}; | ||
| 58 | return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; | ||
| 59 | }); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, | ||
| 64 | Location block_begin) { | ||
| 65 | const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; | ||
| 66 | const Opcode brx_opcode{Decode(brx_insn)}; | ||
| 67 | if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { | ||
| 68 | throw LogicError("Tracked instruction is not BRX or JMX"); | ||
| 69 | } | ||
| 70 | const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; | ||
| 71 | const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)}; | ||
| 72 | |||
| 73 | Location pos{brx_pos}; | ||
| 74 | const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; | ||
| 75 | if (!ldc_insn) { | ||
| 76 | return std::nullopt; | ||
| 77 | } | ||
| 78 | const LDC::Encoding ldc{*ldc_insn}; | ||
| 79 | const u32 cbuf_index{static_cast<u32>(ldc.index)}; | ||
| 80 | const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))}; | ||
| 81 | const IR::Reg ldc_reg{ldc.src_reg}; | ||
| 82 | |||
| 83 | const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; | ||
| 84 | if (!shl_insn) { | ||
| 85 | return std::nullopt; | ||
| 86 | } | ||
| 87 | const Encoding shl{*shl_insn}; | ||
| 88 | const IR::Reg shl_reg{shl.src_reg}; | ||
| 89 | |||
| 90 | const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; | ||
| 91 | if (!imnmx_insn) { | ||
| 92 | return std::nullopt; | ||
| 93 | } | ||
| 94 | const Encoding imnmx{*imnmx_insn}; | ||
| 95 | if (imnmx.is_negative != 0) { | ||
| 96 | return std::nullopt; | ||
| 97 | } | ||
| 98 | const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; | ||
| 99 | return IndirectBranchTableInfo{ | ||
| 100 | .cbuf_index = cbuf_index, | ||
| 101 | .cbuf_offset = cbuf_offset, | ||
| 102 | .num_entries = imnmx_immediate + 1, | ||
| 103 | .branch_offset = brx_offset, | ||
| 104 | .branch_reg = brx_reg, | ||
| 105 | }; | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 14 | |||
| 15 | namespace Shader::Maxwell { | ||
| 16 | |||
| 17 | struct IndirectBranchTableInfo { | ||
| 18 | u32 cbuf_index{}; | ||
| 19 | u32 cbuf_offset{}; | ||
| 20 | u32 num_entries{}; | ||
| 21 | s32 branch_offset{}; | ||
| 22 | IR::Reg branch_reg{}; | ||
| 23 | }; | ||
| 24 | |||
| 25 | std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, | ||
| 26 | Location block_begin); | ||
| 27 | |||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..743d68d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | struct Predicate { | ||
| 15 | Predicate() = default; | ||
| 16 | Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} | ||
| 17 | Predicate(bool value) : index{7}, negated{!value} {} | ||
| 18 | Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {} | ||
| 19 | |||
| 20 | unsigned index; | ||
| 21 | bool negated; | ||
| 22 | }; | ||
| 23 | |||
| 24 | inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { | ||
| 25 | return lhs.index == rhs.index && lhs.negated == rhs.negated; | ||
| 26 | } | ||
| 27 | |||
| 28 | inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { | ||
| 29 | return !(lhs == rhs); | ||
| 30 | } | ||
| 31 | |||
| 32 | union Instruction { | ||
| 33 | Instruction(u64 raw_) : raw{raw_} {} | ||
| 34 | |||
| 35 | u64 raw; | ||
| 36 | |||
| 37 | union { | ||
| 38 | BitField<5, 1, u64> is_cbuf; | ||
| 39 | BitField<0, 5, IR::FlowTest> flow_test; | ||
| 40 | |||
| 41 | [[nodiscard]] u32 Absolute() const noexcept { | ||
| 42 | return static_cast<u32>(absolute); | ||
| 43 | } | ||
| 44 | |||
| 45 | [[nodiscard]] s32 Offset() const noexcept { | ||
| 46 | return static_cast<s32>(offset); | ||
| 47 | } | ||
| 48 | |||
| 49 | private: | ||
| 50 | BitField<20, 24, s64> offset; | ||
| 51 | BitField<20, 32, u64> absolute; | ||
| 52 | } branch; | ||
| 53 | |||
| 54 | [[nodiscard]] Predicate Pred() const noexcept { | ||
| 55 | return Predicate{pred}; | ||
| 56 | } | ||
| 57 | |||
| 58 | private: | ||
| 59 | BitField<16, 4, u64> pred; | ||
| 60 | }; | ||
| 61 | static_assert(std::is_trivially_copyable_v<Instruction>); | ||
| 62 | |||
| 63 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..26d29eae2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <iterator> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/exception.h" | ||
| 14 | |||
| 15 | namespace Shader::Maxwell { | ||
| 16 | |||
| 17 | class Location { | ||
| 18 | static constexpr u32 VIRTUAL_BIAS{4}; | ||
| 19 | |||
| 20 | public: | ||
| 21 | constexpr Location() = default; | ||
| 22 | |||
| 23 | constexpr Location(u32 initial_offset) : offset{initial_offset} { | ||
| 24 | if (initial_offset % 8 != 0) { | ||
| 25 | throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); | ||
| 26 | } | ||
| 27 | Align(); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr Location Virtual() const noexcept { | ||
| 31 | Location virtual_location; | ||
| 32 | virtual_location.offset = offset - VIRTUAL_BIAS; | ||
| 33 | return virtual_location; | ||
| 34 | } | ||
| 35 | |||
| 36 | [[nodiscard]] constexpr u32 Offset() const noexcept { | ||
| 37 | return offset; | ||
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] constexpr bool IsVirtual() const { | ||
| 41 | return offset % 8 == VIRTUAL_BIAS; | ||
| 42 | } | ||
| 43 | |||
| 44 | constexpr auto operator<=>(const Location&) const noexcept = default; | ||
| 45 | |||
| 46 | constexpr Location operator++() noexcept { | ||
| 47 | const Location copy{*this}; | ||
| 48 | Step(); | ||
| 49 | return copy; | ||
| 50 | } | ||
| 51 | |||
| 52 | constexpr Location operator++(int) noexcept { | ||
| 53 | Step(); | ||
| 54 | return *this; | ||
| 55 | } | ||
| 56 | |||
| 57 | constexpr Location operator--() noexcept { | ||
| 58 | const Location copy{*this}; | ||
| 59 | Back(); | ||
| 60 | return copy; | ||
| 61 | } | ||
| 62 | |||
| 63 | constexpr Location operator--(int) noexcept { | ||
| 64 | Back(); | ||
| 65 | return *this; | ||
| 66 | } | ||
| 67 | |||
| 68 | constexpr Location operator+(int number) const { | ||
| 69 | Location new_pc{*this}; | ||
| 70 | while (number > 0) { | ||
| 71 | --number; | ||
| 72 | ++new_pc; | ||
| 73 | } | ||
| 74 | while (number < 0) { | ||
| 75 | ++number; | ||
| 76 | --new_pc; | ||
| 77 | } | ||
| 78 | return new_pc; | ||
| 79 | } | ||
| 80 | |||
| 81 | constexpr Location operator-(int number) const { | ||
| 82 | return operator+(-number); | ||
| 83 | } | ||
| 84 | |||
| 85 | private: | ||
| 86 | constexpr void Align() { | ||
| 87 | offset += offset % 32 == 0 ? 8 : 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | constexpr void Step() { | ||
| 91 | offset += 8 + (offset % 32 == 24 ? 8 : 0); | ||
| 92 | } | ||
| 93 | |||
| 94 | constexpr void Back() { | ||
| 95 | offset -= 8 + (offset % 32 == 8 ? 8 : 0); | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 offset{0xcccccccc}; | ||
| 99 | }; | ||
| 100 | |||
| 101 | } // namespace Shader::Maxwell | ||
| 102 | |||
| 103 | template <> | ||
| 104 | struct fmt::formatter<Shader::Maxwell::Location> { | ||
| 105 | constexpr auto parse(format_parse_context& ctx) { | ||
| 106 | return ctx.begin(); | ||
| 107 | } | ||
| 108 | template <typename FormatContext> | ||
| 109 | auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { | ||
| 110 | return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); | ||
| 111 | } | ||
| 112 | }; | ||
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..2fee591bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | INST(AL2P, "AL2P", "1110 1111 1010 0---") | ||
| 6 | INST(ALD, "ALD", "1110 1111 1101 1---") | ||
| 7 | INST(AST, "AST", "1110 1111 1111 0---") | ||
| 8 | INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") | ||
| 9 | INST(ATOM, "ATOM", "1110 1101 ---- ----") | ||
| 10 | INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") | ||
| 11 | INST(ATOMS, "ATOMS", "1110 1100 ---- ----") | ||
| 12 | INST(B2R, "B2R", "1111 0000 1011 1---") | ||
| 13 | INST(BAR, "BAR", "1111 0000 1010 1---") | ||
| 14 | INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") | ||
| 15 | INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") | ||
| 16 | INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") | ||
| 17 | INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") | ||
| 18 | INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") | ||
| 19 | INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") | ||
| 20 | INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") | ||
| 21 | INST(BPT, "BPT", "1110 0011 1010 ----") | ||
| 22 | INST(BRA, "BRA", "1110 0010 0100 ----") | ||
| 23 | INST(BRK, "BRK", "1110 0011 0100 ----") | ||
| 24 | INST(BRX, "BRX", "1110 0010 0101 ----") | ||
| 25 | INST(CAL, "CAL", "1110 0010 0110 ----") | ||
| 26 | INST(CCTL, "CCTL", "1110 1111 011- ----") | ||
| 27 | INST(CCTLL, "CCTLL", "1110 1111 100- ----") | ||
| 28 | INST(CONT, "CONT", "1110 0011 0101 ----") | ||
| 29 | INST(CS2R, "CS2R", "0101 0000 1100 1---") | ||
| 30 | INST(CSET, "CSET", "0101 0000 1001 1---") | ||
| 31 | INST(CSETP, "CSETP", "0101 0000 1010 0---") | ||
| 32 | INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") | ||
| 33 | INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") | ||
| 34 | INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") | ||
| 35 | INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") | ||
| 36 | INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") | ||
| 37 | INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") | ||
| 38 | INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") | ||
| 39 | INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") | ||
| 40 | INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") | ||
| 41 | INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") | ||
| 42 | INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") | ||
| 43 | INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") | ||
| 44 | INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") | ||
| 45 | INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") | ||
| 46 | INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") | ||
| 47 | INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") | ||
| 48 | INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") | ||
| 49 | INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") | ||
| 50 | INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") | ||
| 51 | INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") | ||
| 52 | INST(EXIT, "EXIT", "1110 0011 0000 ----") | ||
| 53 | INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") | ||
| 54 | INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") | ||
| 55 | INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") | ||
| 56 | INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") | ||
| 57 | INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") | ||
| 58 | INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") | ||
| 59 | INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") | ||
| 60 | INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") | ||
| 61 | INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") | ||
| 62 | INST(FADD32I, "FADD32I", "0000 10-- ---- ----") | ||
| 63 | INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") | ||
| 64 | INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") | ||
| 65 | INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") | ||
| 66 | INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") | ||
| 67 | INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") | ||
| 68 | INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") | ||
| 69 | INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") | ||
| 70 | INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") | ||
| 71 | INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") | ||
| 72 | INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") | ||
| 73 | INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") | ||
| 74 | INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") | ||
| 75 | INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") | ||
| 76 | INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") | ||
| 77 | INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") | ||
| 78 | INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") | ||
| 79 | INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") | ||
| 80 | INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") | ||
| 81 | INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") | ||
| 82 | INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") | ||
| 83 | INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") | ||
| 84 | INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") | ||
| 85 | INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") | ||
| 86 | INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") | ||
| 87 | INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") | ||
| 88 | INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") | ||
| 89 | INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") | ||
| 90 | INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") | ||
| 91 | INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") | ||
| 92 | INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") | ||
| 93 | INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") | ||
| 94 | INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") | ||
| 95 | INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") | ||
| 96 | INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") | ||
| 97 | INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") | ||
| 98 | INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") | ||
| 99 | INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") | ||
| 100 | INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") | ||
| 101 | INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") | ||
| 102 | INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") | ||
| 103 | INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") | ||
| 104 | INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") | ||
| 105 | INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") | ||
| 106 | INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") | ||
| 107 | INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") | ||
| 108 | INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") | ||
| 109 | INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") | ||
| 110 | INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") | ||
| 111 | INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") | ||
| 112 | INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") | ||
| 113 | INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") | ||
| 114 | INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") | ||
| 115 | INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") | ||
| 116 | INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") | ||
| 117 | INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") | ||
| 118 | INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") | ||
| 119 | INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") | ||
| 120 | INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") | ||
| 121 | INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") | ||
| 122 | INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") | ||
| 123 | INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") | ||
| 124 | INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") | ||
| 125 | INST(IADD32I, "IADD32I", "0001 110- ---- ----") | ||
| 126 | INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") | ||
| 127 | INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") | ||
| 128 | INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") | ||
| 129 | INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") | ||
| 130 | INST(IDE, "IDE", "1110 0011 1001 ----") | ||
| 131 | INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") | ||
| 132 | INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") | ||
| 133 | INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") | ||
| 134 | INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") | ||
| 135 | INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") | ||
| 136 | INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") | ||
| 137 | INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") | ||
| 138 | INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") | ||
| 139 | INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") | ||
| 140 | INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") | ||
| 141 | INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") | ||
| 142 | INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") | ||
| 143 | INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") | ||
| 144 | INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") | ||
| 145 | INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") | ||
| 146 | INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") | ||
| 147 | INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") | ||
| 148 | INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") | ||
| 149 | INST(IPA, "IPA", "1110 0000 ---- ----") | ||
| 150 | INST(ISBERD, "ISBERD", "1110 1111 1101 0---") | ||
| 151 | INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") | ||
| 152 | INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") | ||
| 153 | INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") | ||
| 154 | INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") | ||
| 155 | INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") | ||
| 156 | INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") | ||
| 157 | INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") | ||
| 158 | INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") | ||
| 159 | INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") | ||
| 160 | INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") | ||
| 161 | INST(JCAL, "JCAL", "1110 0010 0010 ----") | ||
| 162 | INST(JMP, "JMP", "1110 0010 0001 ----") | ||
| 163 | INST(JMX, "JMX", "1110 0010 0000 ----") | ||
| 164 | INST(KIL, "KIL", "1110 0011 0011 ----") | ||
| 165 | INST(LD, "LD", "100- ---- ---- ----") | ||
| 166 | INST(LDC, "LDC", "1110 1111 1001 0---") | ||
| 167 | INST(LDG, "LDG", "1110 1110 1101 0---") | ||
| 168 | INST(LDL, "LDL", "1110 1111 0100 0---") | ||
| 169 | INST(LDS, "LDS", "1110 1111 0100 1---") | ||
| 170 | INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") | ||
| 171 | INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") | ||
| 172 | INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") | ||
| 173 | INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") | ||
| 174 | INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") | ||
| 175 | INST(LEPC, "LEPC", "0101 0000 1101 0---") | ||
| 176 | INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") | ||
| 177 | INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") | ||
| 178 | INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") | ||
| 179 | INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") | ||
| 180 | INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") | ||
| 181 | INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") | ||
| 182 | INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") | ||
| 183 | INST(LOP32I, "LOP32I", "0000 01-- ---- ----") | ||
| 184 | INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") | ||
| 185 | INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") | ||
| 186 | INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") | ||
| 187 | INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") | ||
| 188 | INST(MOV32I, "MOV32I", "0000 0001 0000 ----") | ||
| 189 | INST(MUFU, "MUFU", "0101 0000 1000 0---") | ||
| 190 | INST(NOP, "NOP", "0101 0000 1011 0---") | ||
| 191 | INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") | ||
| 192 | INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") | ||
| 193 | INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") | ||
| 194 | INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") | ||
| 195 | INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") | ||
| 196 | INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") | ||
| 197 | INST(PBK, "PBK", "1110 0010 1010 ----") | ||
| 198 | INST(PCNT, "PCNT", "1110 0010 1011 ----") | ||
| 199 | INST(PEXIT, "PEXIT", "1110 0010 0011 ----") | ||
| 200 | INST(PIXLD, "PIXLD", "1110 1111 1110 1---") | ||
| 201 | INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") | ||
| 202 | INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") | ||
| 203 | INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") | ||
| 204 | INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") | ||
| 205 | INST(PRET, "PRET", "1110 0010 0111 ----") | ||
| 206 | INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") | ||
| 207 | INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") | ||
| 208 | INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") | ||
| 209 | INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") | ||
| 210 | INST(PSET, "PSET", "0101 0000 1000 1---") | ||
| 211 | INST(PSETP, "PSETP", "0101 0000 1001 0---") | ||
| 212 | INST(R2B, "R2B", "1111 0000 1100 0---") | ||
| 213 | INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") | ||
| 214 | INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") | ||
| 215 | INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") | ||
| 216 | INST(RAM, "RAM", "1110 0011 1000 ----") | ||
| 217 | INST(RED, "RED", "1110 1011 1111 1---") | ||
| 218 | INST(RET, "RET", "1110 0011 0010 ----") | ||
| 219 | INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") | ||
| 220 | INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") | ||
| 221 | INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") | ||
| 222 | INST(RTT, "RTT", "1110 0011 0110 ----") | ||
| 223 | INST(S2R, "S2R", "1111 0000 1100 1---") | ||
| 224 | INST(SAM, "SAM", "1110 0011 0111 ----") | ||
| 225 | INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") | ||
| 226 | INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") | ||
| 227 | INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") | ||
| 228 | INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") | ||
| 229 | INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") | ||
| 230 | INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") | ||
| 231 | INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") | ||
| 232 | INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") | ||
| 233 | INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") | ||
| 234 | INST(SHFL, "SHFL", "1110 1111 0001 0---") | ||
| 235 | INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") | ||
| 236 | INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") | ||
| 237 | INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") | ||
| 238 | INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") | ||
| 239 | INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") | ||
| 240 | INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") | ||
| 241 | INST(SSY, "SSY", "1110 0010 1001 ----") | ||
| 242 | INST(ST, "ST", "101- ---- ---- ----") | ||
| 243 | INST(STG, "STG", "1110 1110 1101 1---") | ||
| 244 | INST(STL, "STL", "1110 1111 0101 0---") | ||
| 245 | INST(STP, "STP", "1110 1110 1010 0---") | ||
| 246 | INST(STS, "STS", "1110 1111 0101 1---") | ||
| 247 | INST(SUATOM, "SUATOM", "1110 1010 0--- ----") | ||
| 248 | INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") | ||
| 249 | INST(SULD, "SULD", "1110 1011 000- ----") | ||
| 250 | INST(SURED, "SURED", "1110 1011 010- ----") | ||
| 251 | INST(SUST, "SUST", "1110 1011 001- ----") | ||
| 252 | INST(SYNC, "SYNC", "1111 0000 1111 1---") | ||
| 253 | INST(TEX, "TEX", "1100 0--- ---- ----") | ||
| 254 | INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") | ||
| 255 | INST(TEXS, "TEXS", "1101 -00- ---- ----") | ||
| 256 | INST(TLD, "TLD", "1101 1100 ---- ----") | ||
| 257 | INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") | ||
| 258 | INST(TLD4, "TLD4", "1100 10-- ---- ----") | ||
| 259 | INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") | ||
| 260 | INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") | ||
| 261 | INST(TLDS, "TLDS", "1101 -01- ---- ----") | ||
| 262 | INST(TMML, "TMML", "1101 1111 0101 1---") | ||
| 263 | INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") | ||
| 264 | INST(TXA, "TXA", "1101 1111 0100 0---") | ||
| 265 | INST(TXD, "TXD", "1101 1110 00-- ----") | ||
| 266 | INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") | ||
| 267 | INST(TXQ, "TXQ", "1101 1111 0100 1---") | ||
| 268 | INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") | ||
| 269 | INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") | ||
| 270 | INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") | ||
| 271 | INST(VADD, "VADD", "0010 00-- ---- ----") | ||
| 272 | INST(VMAD, "VMAD", "0101 1111 ---- ----") | ||
| 273 | INST(VMNMX, "VMNMX", "0011 101- ---- ----") | ||
| 274 | INST(VOTE, "VOTE", "0101 0000 1101 1---") | ||
| 275 | INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") | ||
| 276 | INST(VSET, "VSET", "0100 000- ---- ----") | ||
| 277 | INST(VSETP, "VSETP", "0101 0000 1111 0---") | ||
| 278 | INST(VSHL, "VSHL", "0101 0111 ---- ----") | ||
| 279 | INST(VSHR, "VSHR", "0101 0110 ---- ----") | ||
| 280 | INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") | ||
| 281 | INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") | ||
| 282 | INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") | ||
| 283 | INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") | ||
| 284 | |||
| 285 | // Removed due to its weird formatting making fast tables larger | ||
| 286 | // INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") | ||
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 000000000..ccc40c20c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | constexpr std::array NAME_TABLE{ | ||
| 13 | #define INST(name, cute, encode) cute, | ||
| 14 | #include "maxwell.inc" | ||
| 15 | #undef INST | ||
| 16 | }; | ||
| 17 | } // Anonymous namespace | ||
| 18 | |||
| 19 | const char* NameOf(Opcode opcode) { | ||
| 20 | if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) { | ||
| 21 | throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode)); | ||
| 22 | } | ||
| 23 | return NAME_TABLE[static_cast<size_t>(opcode)]; | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | |||
| 11 | enum class Opcode { | ||
| 12 | #define INST(name, cute, encode) name, | ||
| 13 | #include "maxwell.inc" | ||
| 14 | #undef INST | ||
| 15 | }; | ||
| 16 | |||
| 17 | const char* NameOf(Opcode opcode); | ||
| 18 | |||
| 19 | } // namespace Shader::Maxwell | ||
| 20 | |||
| 21 | template <> | ||
| 22 | struct fmt::formatter<Shader::Maxwell::Opcode> { | ||
| 23 | constexpr auto parse(format_parse_context& ctx) { | ||
| 24 | return ctx.begin(); | ||
| 25 | } | ||
| 26 | template <typename FormatContext> | ||
| 27 | auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { | ||
| 28 | return format_to(ctx.out(), "{}", NameOf(opcode)); | ||
| 29 | } | ||
| 30 | }; | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 000000000..8b3e0a15c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | |||
| @@ -0,0 +1,883 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <string> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include <version> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include <boost/intrusive/list.hpp> | ||
| 16 | |||
| 17 | #include "shader_recompiler/environment.h" | ||
| 18 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 19 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 20 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||
| 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | ||
| 24 | |||
| 25 | namespace Shader::Maxwell { | ||
| 26 | namespace { | ||
| 27 | struct Statement; | ||
| 28 | |||
| 29 | // Use normal_link because we are not guaranteed to destroy the tree in order | ||
| 30 | using ListBaseHook = | ||
| 31 | boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; | ||
| 32 | |||
| 33 | using Tree = boost::intrusive::list<Statement, | ||
| 34 | // Allow using Statement without a definition | ||
| 35 | boost::intrusive::base_hook<ListBaseHook>, | ||
| 36 | // Avoid linear complexity on splice, size is never called | ||
| 37 | boost::intrusive::constant_time_size<false>>; | ||
| 38 | using Node = Tree::iterator; | ||
| 39 | |||
| 40 | enum class StatementType { | ||
| 41 | Code, | ||
| 42 | Goto, | ||
| 43 | Label, | ||
| 44 | If, | ||
| 45 | Loop, | ||
| 46 | Break, | ||
| 47 | Return, | ||
| 48 | Kill, | ||
| 49 | Unreachable, | ||
| 50 | Function, | ||
| 51 | Identity, | ||
| 52 | Not, | ||
| 53 | Or, | ||
| 54 | SetVariable, | ||
| 55 | SetIndirectBranchVariable, | ||
| 56 | Variable, | ||
| 57 | IndirectBranchCond, | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool HasChildren(StatementType type) { | ||
| 61 | switch (type) { | ||
| 62 | case StatementType::If: | ||
| 63 | case StatementType::Loop: | ||
| 64 | case StatementType::Function: | ||
| 65 | return true; | ||
| 66 | default: | ||
| 67 | return false; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | struct Goto {}; | ||
| 72 | struct Label {}; | ||
| 73 | struct If {}; | ||
| 74 | struct Loop {}; | ||
| 75 | struct Break {}; | ||
| 76 | struct Return {}; | ||
| 77 | struct Kill {}; | ||
| 78 | struct Unreachable {}; | ||
| 79 | struct FunctionTag {}; | ||
| 80 | struct Identity {}; | ||
| 81 | struct Not {}; | ||
| 82 | struct Or {}; | ||
| 83 | struct SetVariable {}; | ||
| 84 | struct SetIndirectBranchVariable {}; | ||
| 85 | struct Variable {}; | ||
| 86 | struct IndirectBranchCond {}; | ||
| 87 | |||
| 88 | #ifdef _MSC_VER | ||
| 89 | #pragma warning(push) | ||
| 90 | #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement | ||
| 91 | #endif | ||
| 92 | struct Statement : ListBaseHook { | ||
| 93 | Statement(const Flow::Block* block_, Statement* up_) | ||
| 94 | : block{block_}, up{up_}, type{StatementType::Code} {} | ||
| 95 | Statement(Goto, Statement* cond_, Node label_, Statement* up_) | ||
| 96 | : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} | ||
| 97 | Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} | ||
| 98 | Statement(If, Statement* cond_, Tree&& children_, Statement* up_) | ||
| 99 | : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} | ||
| 100 | Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) | ||
| 101 | : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} | ||
| 102 | Statement(Break, Statement* cond_, Statement* up_) | ||
| 103 | : cond{cond_}, up{up_}, type{StatementType::Break} {} | ||
| 104 | Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} | ||
| 105 | Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} | ||
| 106 | Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} | ||
| 107 | Statement(FunctionTag) : children{}, type{StatementType::Function} {} | ||
| 108 | Statement(Identity, IR::Condition cond_, Statement* up_) | ||
| 109 | : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} | ||
| 110 | Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} | ||
| 111 | Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) | ||
| 112 | : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} | ||
| 113 | Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) | ||
| 114 | : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} | ||
| 115 | Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) | ||
| 116 | : branch_offset{branch_offset_}, | ||
| 117 | branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} | ||
| 118 | Statement(Variable, u32 id_, Statement* up_) | ||
| 119 | : id{id_}, up{up_}, type{StatementType::Variable} {} | ||
| 120 | Statement(IndirectBranchCond, u32 location_, Statement* up_) | ||
| 121 | : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} | ||
| 122 | |||
| 123 | ~Statement() { | ||
| 124 | if (HasChildren(type)) { | ||
| 125 | std::destroy_at(&children); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | union { | ||
| 130 | const Flow::Block* block; | ||
| 131 | Node label; | ||
| 132 | Tree children; | ||
| 133 | IR::Condition guest_cond; | ||
| 134 | Statement* op; | ||
| 135 | Statement* op_a; | ||
| 136 | u32 location; | ||
| 137 | s32 branch_offset; | ||
| 138 | }; | ||
| 139 | union { | ||
| 140 | Statement* cond; | ||
| 141 | Statement* op_b; | ||
| 142 | u32 id; | ||
| 143 | IR::Reg branch_reg; | ||
| 144 | }; | ||
| 145 | Statement* up{}; | ||
| 146 | StatementType type; | ||
| 147 | }; | ||
| 148 | #ifdef _MSC_VER | ||
| 149 | #pragma warning(pop) | ||
| 150 | #endif | ||
| 151 | |||
| 152 | std::string DumpExpr(const Statement* stmt) { | ||
| 153 | switch (stmt->type) { | ||
| 154 | case StatementType::Identity: | ||
| 155 | return fmt::format("{}", stmt->guest_cond); | ||
| 156 | case StatementType::Not: | ||
| 157 | return fmt::format("!{}", DumpExpr(stmt->op)); | ||
| 158 | case StatementType::Or: | ||
| 159 | return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); | ||
| 160 | case StatementType::Variable: | ||
| 161 | return fmt::format("goto_L{}", stmt->id); | ||
| 162 | case StatementType::IndirectBranchCond: | ||
| 163 | return fmt::format("(indirect_branch == {:x})", stmt->location); | ||
| 164 | default: | ||
| 165 | return "<invalid type>"; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | [[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { | ||
| 170 | std::string ret; | ||
| 171 | std::string indent(indentation, ' '); | ||
| 172 | for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { | ||
| 173 | switch (stmt->type) { | ||
| 174 | case StatementType::Code: | ||
| 175 | ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, | ||
| 176 | stmt->block->begin.Offset(), stmt->block->end.Offset(), | ||
| 177 | reinterpret_cast<uintptr_t>(stmt->block)); | ||
| 178 | break; | ||
| 179 | case StatementType::Goto: | ||
| 180 | ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), | ||
| 181 | stmt->label->id); | ||
| 182 | break; | ||
| 183 | case StatementType::Label: | ||
| 184 | ret += fmt::format("{}L{}:\n", indent, stmt->id); | ||
| 185 | break; | ||
| 186 | case StatementType::If: | ||
| 187 | ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); | ||
| 188 | ret += DumpTree(stmt->children, indentation + 4); | ||
| 189 | ret += fmt::format("{} }}\n", indent); | ||
| 190 | break; | ||
| 191 | case StatementType::Loop: | ||
| 192 | ret += fmt::format("{} do {{\n", indent); | ||
| 193 | ret += DumpTree(stmt->children, indentation + 4); | ||
| 194 | ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); | ||
| 195 | break; | ||
| 196 | case StatementType::Break: | ||
| 197 | ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); | ||
| 198 | break; | ||
| 199 | case StatementType::Return: | ||
| 200 | ret += fmt::format("{} return;\n", indent); | ||
| 201 | break; | ||
| 202 | case StatementType::Kill: | ||
| 203 | ret += fmt::format("{} kill;\n", indent); | ||
| 204 | break; | ||
| 205 | case StatementType::Unreachable: | ||
| 206 | ret += fmt::format("{} unreachable;\n", indent); | ||
| 207 | break; | ||
| 208 | case StatementType::SetVariable: | ||
| 209 | ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); | ||
| 210 | break; | ||
| 211 | case StatementType::SetIndirectBranchVariable: | ||
| 212 | ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, | ||
| 213 | stmt->branch_offset); | ||
| 214 | break; | ||
| 215 | case StatementType::Function: | ||
| 216 | case StatementType::Identity: | ||
| 217 | case StatementType::Not: | ||
| 218 | case StatementType::Or: | ||
| 219 | case StatementType::Variable: | ||
| 220 | case StatementType::IndirectBranchCond: | ||
| 221 | throw LogicError("Statement can't be printed"); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | return ret; | ||
| 225 | } | ||
| 226 | |||
| 227 | void SanitizeNoBreaks(const Tree& tree) { | ||
| 228 | if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { | ||
| 229 | throw NotImplementedException("Capturing statement with break nodes"); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | size_t Level(Node stmt) { | ||
| 234 | size_t level{0}; | ||
| 235 | Statement* node{stmt->up}; | ||
| 236 | while (node) { | ||
| 237 | ++level; | ||
| 238 | node = node->up; | ||
| 239 | } | ||
| 240 | return level; | ||
| 241 | } | ||
| 242 | |||
| 243 | bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { | ||
| 244 | const size_t goto_level{Level(goto_stmt)}; | ||
| 245 | const size_t label_level{Level(label_stmt)}; | ||
| 246 | size_t min_level; | ||
| 247 | size_t max_level; | ||
| 248 | Node min; | ||
| 249 | Node max; | ||
| 250 | if (label_level < goto_level) { | ||
| 251 | min_level = label_level; | ||
| 252 | max_level = goto_level; | ||
| 253 | min = label_stmt; | ||
| 254 | max = goto_stmt; | ||
| 255 | } else { // goto_level < label_level | ||
| 256 | min_level = goto_level; | ||
| 257 | max_level = label_level; | ||
| 258 | min = goto_stmt; | ||
| 259 | max = label_stmt; | ||
| 260 | } | ||
| 261 | while (max_level > min_level) { | ||
| 262 | --max_level; | ||
| 263 | max = max->up; | ||
| 264 | } | ||
| 265 | return min->up == max->up; | ||
| 266 | } | ||
| 267 | |||
| 268 | bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { | ||
| 269 | return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); | ||
| 270 | } | ||
| 271 | |||
| 272 | [[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { | ||
| 273 | Node it{goto_stmt}; | ||
| 274 | do { | ||
| 275 | if (it == label_stmt) { | ||
| 276 | return true; | ||
| 277 | } | ||
| 278 | --it; | ||
| 279 | } while (it != goto_stmt->up->children.begin()); | ||
| 280 | while (it != goto_stmt->up->children.end()) { | ||
| 281 | if (it == label_stmt) { | ||
| 282 | return true; | ||
| 283 | } | ||
| 284 | ++it; | ||
| 285 | } | ||
| 286 | return false; | ||
| 287 | } | ||
| 288 | |||
| 289 | Node SiblingFromNephew(Node uncle, Node nephew) noexcept { | ||
| 290 | Statement* const parent{uncle->up}; | ||
| 291 | Statement* it{&*nephew}; | ||
| 292 | while (it->up != parent) { | ||
| 293 | it = it->up; | ||
| 294 | } | ||
| 295 | return Tree::s_iterator_to(*it); | ||
| 296 | } | ||
| 297 | |||
| 298 | bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { | ||
| 299 | const Node end{right_sibling->up->children.end()}; | ||
| 300 | for (auto it = right_sibling; it != end; ++it) { | ||
| 301 | if (it == left_sibling) { | ||
| 302 | return false; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | return true; | ||
| 306 | } | ||
| 307 | |||
| 308 | bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { | ||
| 309 | const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; | ||
| 310 | return AreOrdered(sibling, goto_stmt); | ||
| 311 | } | ||
| 312 | |||
| 313 | class GotoPass { | ||
| 314 | public: | ||
| 315 | explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { | ||
| 316 | std::vector gotos{BuildTree(cfg)}; | ||
| 317 | const auto end{gotos.rend()}; | ||
| 318 | for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { | ||
| 319 | RemoveGoto(*goto_stmt); | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 323 | Statement& RootStatement() noexcept { | ||
| 324 | return root_stmt; | ||
| 325 | } | ||
| 326 | |||
| 327 | private: | ||
| 328 | void RemoveGoto(Node goto_stmt) { | ||
| 329 | // Force goto_stmt and label_stmt to be directly related | ||
| 330 | const Node label_stmt{goto_stmt->label}; | ||
| 331 | if (IsIndirectlyRelated(goto_stmt, label_stmt)) { | ||
| 332 | // Move goto_stmt out using outward-movement transformation until it becomes | ||
| 333 | // directly related to label_stmt | ||
| 334 | while (!IsDirectlyRelated(goto_stmt, label_stmt)) { | ||
| 335 | goto_stmt = MoveOutward(goto_stmt); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | // Force goto_stmt and label_stmt to be siblings | ||
| 339 | if (IsDirectlyRelated(goto_stmt, label_stmt)) { | ||
| 340 | const size_t label_level{Level(label_stmt)}; | ||
| 341 | size_t goto_level{Level(goto_stmt)}; | ||
| 342 | if (goto_level > label_level) { | ||
| 343 | // Move goto_stmt out of its level using outward-movement transformations | ||
| 344 | while (goto_level > label_level) { | ||
| 345 | goto_stmt = MoveOutward(goto_stmt); | ||
| 346 | --goto_level; | ||
| 347 | } | ||
| 348 | } else { // Level(goto_stmt) < Level(label_stmt) | ||
| 349 | if (NeedsLift(goto_stmt, label_stmt)) { | ||
| 350 | // Lift goto_stmt to above stmt containing label_stmt using goto-lifting | ||
| 351 | // transformations | ||
| 352 | goto_stmt = Lift(goto_stmt); | ||
| 353 | } | ||
| 354 | // Move goto_stmt into label_stmt's level using inward-movement transformation | ||
| 355 | while (goto_level < label_level) { | ||
| 356 | goto_stmt = MoveInward(goto_stmt); | ||
| 357 | ++goto_level; | ||
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | // Expensive operation: | ||
| 362 | // if (!AreSiblings(goto_stmt, label_stmt)) { | ||
| 363 | // throw LogicError("Goto is not a sibling with the label"); | ||
| 364 | // } | ||
| 365 | // goto_stmt and label_stmt are guaranteed to be siblings, eliminate | ||
| 366 | if (std::next(goto_stmt) == label_stmt) { | ||
| 367 | // Simply eliminate the goto if the label is next to it | ||
| 368 | goto_stmt->up->children.erase(goto_stmt); | ||
| 369 | } else if (AreOrdered(goto_stmt, label_stmt)) { | ||
| 370 | // Eliminate goto_stmt with a conditional | ||
| 371 | EliminateAsConditional(goto_stmt, label_stmt); | ||
| 372 | } else { | ||
| 373 | // Eliminate goto_stmt with a loop | ||
| 374 | EliminateAsLoop(goto_stmt, label_stmt); | ||
| 375 | } | ||
| 376 | } | ||
| 377 | |||
| 378 | std::vector<Node> BuildTree(Flow::CFG& cfg) { | ||
| 379 | u32 label_id{0}; | ||
| 380 | std::vector<Node> gotos; | ||
| 381 | Flow::Function& first_function{cfg.Functions().front()}; | ||
| 382 | BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); | ||
| 383 | return gotos; | ||
| 384 | } | ||
| 385 | |||
| 386 | void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, | ||
| 387 | std::vector<Node>& gotos, Node function_insert_point, | ||
| 388 | std::optional<Node> return_label) { | ||
| 389 | Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; | ||
| 390 | Tree& root{root_stmt.children}; | ||
| 391 | std::unordered_map<Flow::Block*, Node> local_labels; | ||
| 392 | local_labels.reserve(function.blocks.size()); | ||
| 393 | |||
| 394 | for (Flow::Block& block : function.blocks) { | ||
| 395 | Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; | ||
| 396 | const Node label_it{root.insert(function_insert_point, *label)}; | ||
| 397 | local_labels.emplace(&block, label_it); | ||
| 398 | ++label_id; | ||
| 399 | } | ||
| 400 | for (Flow::Block& block : function.blocks) { | ||
| 401 | const Node label{local_labels.at(&block)}; | ||
| 402 | // Insertion point | ||
| 403 | const Node ip{std::next(label)}; | ||
| 404 | |||
| 405 | // Reset goto variables before the first block and after its respective label | ||
| 406 | const auto make_reset_variable{[&]() -> Statement& { | ||
| 407 | return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); | ||
| 408 | }}; | ||
| 409 | root.push_front(make_reset_variable()); | ||
| 410 | root.insert(ip, make_reset_variable()); | ||
| 411 | root.insert(ip, *pool.Create(&block, &root_stmt)); | ||
| 412 | |||
| 413 | switch (block.end_class) { | ||
| 414 | case Flow::EndClass::Branch: { | ||
| 415 | Statement* const always_cond{ | ||
| 416 | pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; | ||
| 417 | if (block.cond == IR::Condition{true}) { | ||
| 418 | const Node true_label{local_labels.at(block.branch_true)}; | ||
| 419 | gotos.push_back( | ||
| 420 | root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); | ||
| 421 | } else if (block.cond == IR::Condition{false}) { | ||
| 422 | const Node false_label{local_labels.at(block.branch_false)}; | ||
| 423 | gotos.push_back(root.insert( | ||
| 424 | ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); | ||
| 425 | } else { | ||
| 426 | const Node true_label{local_labels.at(block.branch_true)}; | ||
| 427 | const Node false_label{local_labels.at(block.branch_false)}; | ||
| 428 | Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; | ||
| 429 | gotos.push_back( | ||
| 430 | root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); | ||
| 431 | gotos.push_back(root.insert( | ||
| 432 | ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); | ||
| 433 | } | ||
| 434 | break; | ||
| 435 | } | ||
| 436 | case Flow::EndClass::IndirectBranch: | ||
| 437 | root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, | ||
| 438 | block.branch_offset, &root_stmt)); | ||
| 439 | for (const Flow::IndirectBranch& indirect : block.indirect_branches) { | ||
| 440 | const Node indirect_label{local_labels.at(indirect.block)}; | ||
| 441 | Statement* cond{ | ||
| 442 | pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; | ||
| 443 | Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; | ||
| 444 | gotos.push_back(root.insert(ip, *goto_stmt)); | ||
| 445 | } | ||
| 446 | root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); | ||
| 447 | break; | ||
| 448 | case Flow::EndClass::Call: { | ||
| 449 | Flow::Function& call{cfg.Functions()[block.function_call]}; | ||
| 450 | const Node call_return_label{local_labels.at(block.return_block)}; | ||
| 451 | BuildTree(cfg, call, label_id, gotos, ip, call_return_label); | ||
| 452 | break; | ||
| 453 | } | ||
| 454 | case Flow::EndClass::Exit: | ||
| 455 | root.insert(ip, *pool.Create(Return{}, &root_stmt)); | ||
| 456 | break; | ||
| 457 | case Flow::EndClass::Return: { | ||
| 458 | Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; | ||
| 459 | auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; | ||
| 460 | gotos.push_back(root.insert(ip, *goto_stmt)); | ||
| 461 | break; | ||
| 462 | } | ||
| 463 | case Flow::EndClass::Kill: | ||
| 464 | root.insert(ip, *pool.Create(Kill{}, &root_stmt)); | ||
| 465 | break; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | } | ||
| 469 | |||
| 470 | void UpdateTreeUp(Statement* tree) { | ||
| 471 | for (Statement& stmt : tree->children) { | ||
| 472 | stmt.up = tree; | ||
| 473 | } | ||
| 474 | } | ||
| 475 | |||
| 476 | void EliminateAsConditional(Node goto_stmt, Node label_stmt) { | ||
| 477 | Tree& body{goto_stmt->up->children}; | ||
| 478 | Tree if_body; | ||
| 479 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); | ||
| 480 | Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; | ||
| 481 | Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; | ||
| 482 | UpdateTreeUp(if_stmt); | ||
| 483 | body.insert(goto_stmt, *if_stmt); | ||
| 484 | body.erase(goto_stmt); | ||
| 485 | } | ||
| 486 | |||
| 487 | void EliminateAsLoop(Node goto_stmt, Node label_stmt) { | ||
| 488 | Tree& body{goto_stmt->up->children}; | ||
| 489 | Tree loop_body; | ||
| 490 | loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); | ||
| 491 | Statement* const cond{goto_stmt->cond}; | ||
| 492 | Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; | ||
| 493 | UpdateTreeUp(loop); | ||
| 494 | body.insert(goto_stmt, *loop); | ||
| 495 | body.erase(goto_stmt); | ||
| 496 | } | ||
| 497 | |||
| 498 | [[nodiscard]] Node MoveOutward(Node goto_stmt) { | ||
| 499 | switch (goto_stmt->up->type) { | ||
| 500 | case StatementType::If: | ||
| 501 | return MoveOutwardIf(goto_stmt); | ||
| 502 | case StatementType::Loop: | ||
| 503 | return MoveOutwardLoop(goto_stmt); | ||
| 504 | default: | ||
| 505 | throw LogicError("Invalid outward movement"); | ||
| 506 | } | ||
| 507 | } | ||
| 508 | |||
| 509 | [[nodiscard]] Node MoveInward(Node goto_stmt) { | ||
| 510 | Statement* const parent{goto_stmt->up}; | ||
| 511 | Tree& body{parent->children}; | ||
| 512 | const Node label{goto_stmt->label}; | ||
| 513 | const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; | ||
| 514 | const u32 label_id{label->id}; | ||
| 515 | |||
| 516 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 517 | Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; | ||
| 518 | body.insert(goto_stmt, *set_var); | ||
| 519 | |||
| 520 | Tree if_body; | ||
| 521 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); | ||
| 522 | Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 523 | Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; | ||
| 524 | if (!if_body.empty()) { | ||
| 525 | Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; | ||
| 526 | UpdateTreeUp(if_stmt); | ||
| 527 | body.insert(goto_stmt, *if_stmt); | ||
| 528 | } | ||
| 529 | body.erase(goto_stmt); | ||
| 530 | |||
| 531 | switch (label_nested_stmt->type) { | ||
| 532 | case StatementType::If: | ||
| 533 | // Update nested if condition | ||
| 534 | label_nested_stmt->cond = | ||
| 535 | pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); | ||
| 536 | break; | ||
| 537 | case StatementType::Loop: | ||
| 538 | break; | ||
| 539 | default: | ||
| 540 | throw LogicError("Invalid inward movement"); | ||
| 541 | } | ||
| 542 | Tree& nested_tree{label_nested_stmt->children}; | ||
| 543 | Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; | ||
| 544 | return nested_tree.insert(nested_tree.begin(), *new_goto); | ||
| 545 | } | ||
| 546 | |||
| 547 | [[nodiscard]] Node Lift(Node goto_stmt) { | ||
| 548 | Statement* const parent{goto_stmt->up}; | ||
| 549 | Tree& body{parent->children}; | ||
| 550 | const Node label{goto_stmt->label}; | ||
| 551 | const u32 label_id{label->id}; | ||
| 552 | const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; | ||
| 553 | |||
| 554 | Tree loop_body; | ||
| 555 | loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); | ||
| 556 | SanitizeNoBreaks(loop_body); | ||
| 557 | Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 558 | Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; | ||
| 559 | UpdateTreeUp(loop_stmt); | ||
| 560 | body.insert(goto_stmt, *loop_stmt); | ||
| 561 | |||
| 562 | Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; | ||
| 563 | loop_stmt->children.push_front(*new_goto); | ||
| 564 | const Node new_goto_node{loop_stmt->children.begin()}; | ||
| 565 | |||
| 566 | Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; | ||
| 567 | loop_stmt->children.push_back(*set_var); | ||
| 568 | |||
| 569 | body.erase(goto_stmt); | ||
| 570 | return new_goto_node; | ||
| 571 | } | ||
| 572 | |||
| 573 | Node MoveOutwardIf(Node goto_stmt) { | ||
| 574 | const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; | ||
| 575 | Tree& body{parent->children}; | ||
| 576 | const u32 label_id{goto_stmt->label->id}; | ||
| 577 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 578 | Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; | ||
| 579 | body.insert(goto_stmt, *set_goto_var); | ||
| 580 | |||
| 581 | Tree if_body; | ||
| 582 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); | ||
| 583 | if_body.pop_front(); | ||
| 584 | Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 585 | Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; | ||
| 586 | Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; | ||
| 587 | UpdateTreeUp(if_stmt); | ||
| 588 | body.insert(goto_stmt, *if_stmt); | ||
| 589 | |||
| 590 | body.erase(goto_stmt); | ||
| 591 | |||
| 592 | Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 593 | Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; | ||
| 594 | Tree& parent_tree{parent->up->children}; | ||
| 595 | return parent_tree.insert(std::next(parent), *new_goto); | ||
| 596 | } | ||
| 597 | |||
| 598 | Node MoveOutwardLoop(Node goto_stmt) { | ||
| 599 | Statement* const parent{goto_stmt->up}; | ||
| 600 | Tree& body{parent->children}; | ||
| 601 | const u32 label_id{goto_stmt->label->id}; | ||
| 602 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 603 | Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; | ||
| 604 | Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 605 | Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; | ||
| 606 | body.insert(goto_stmt, *set_goto_var); | ||
| 607 | body.insert(goto_stmt, *break_stmt); | ||
| 608 | body.erase(goto_stmt); | ||
| 609 | |||
| 610 | const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; | ||
| 611 | Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 612 | Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; | ||
| 613 | Tree& parent_tree{loop->up->children}; | ||
| 614 | return parent_tree.insert(std::next(loop), *new_goto); | ||
| 615 | } | ||
| 616 | |||
| 617 | ObjectPool<Statement>& pool; | ||
| 618 | Statement root_stmt{FunctionTag{}}; | ||
| 619 | }; | ||
| 620 | |||
| 621 | [[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { | ||
| 622 | Tree& tree{stmt.up->children}; | ||
| 623 | const Node end{tree.end()}; | ||
| 624 | Node forward_node{std::next(Tree::s_iterator_to(stmt))}; | ||
| 625 | while (forward_node != end && !HasChildren(forward_node->type)) { | ||
| 626 | if (forward_node->type == StatementType::Code) { | ||
| 627 | return &*forward_node; | ||
| 628 | } | ||
| 629 | ++forward_node; | ||
| 630 | } | ||
| 631 | return nullptr; | ||
| 632 | } | ||
| 633 | |||
| 634 | [[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { | ||
| 635 | switch (stmt.type) { | ||
| 636 | case StatementType::Identity: | ||
| 637 | return ir.Condition(stmt.guest_cond); | ||
| 638 | case StatementType::Not: | ||
| 639 | return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); | ||
| 640 | case StatementType::Or: | ||
| 641 | return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); | ||
| 642 | case StatementType::Variable: | ||
| 643 | return ir.GetGotoVariable(stmt.id); | ||
| 644 | case StatementType::IndirectBranchCond: | ||
| 645 | return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); | ||
| 646 | default: | ||
| 647 | throw NotImplementedException("Statement type {}", stmt.type); | ||
| 648 | } | ||
| 649 | } | ||
| 650 | |||
| 651 | class TranslatePass { | ||
| 652 | public: | ||
| 653 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, | ||
| 654 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, | ||
| 655 | IR::AbstractSyntaxList& syntax_list_) | ||
| 656 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, | ||
| 657 | syntax_list{syntax_list_} { | ||
| 658 | Visit(root_stmt, nullptr, nullptr); | ||
| 659 | |||
| 660 | IR::Block& first_block{*syntax_list.front().data.block}; | ||
| 661 | IR::IREmitter ir(first_block, first_block.begin()); | ||
| 662 | ir.Prologue(); | ||
| 663 | } | ||
| 664 | |||
| 665 | private: | ||
| 666 | void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { | ||
| 667 | IR::Block* current_block{}; | ||
| 668 | const auto ensure_block{[&] { | ||
| 669 | if (current_block) { | ||
| 670 | return; | ||
| 671 | } | ||
| 672 | current_block = block_pool.Create(inst_pool); | ||
| 673 | auto& node{syntax_list.emplace_back()}; | ||
| 674 | node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 675 | node.data.block = current_block; | ||
| 676 | }}; | ||
| 677 | Tree& tree{parent.children}; | ||
| 678 | for (auto it = tree.begin(); it != tree.end(); ++it) { | ||
| 679 | Statement& stmt{*it}; | ||
| 680 | switch (stmt.type) { | ||
| 681 | case StatementType::Label: | ||
| 682 | // Labels can be ignored | ||
| 683 | break; | ||
| 684 | case StatementType::Code: { | ||
| 685 | ensure_block(); | ||
| 686 | Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | case StatementType::SetVariable: { | ||
| 690 | ensure_block(); | ||
| 691 | IR::IREmitter ir{*current_block}; | ||
| 692 | ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | case StatementType::SetIndirectBranchVariable: { | ||
| 696 | ensure_block(); | ||
| 697 | IR::IREmitter ir{*current_block}; | ||
| 698 | IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; | ||
| 699 | ir.SetIndirectBranchVariable(address); | ||
| 700 | break; | ||
| 701 | } | ||
| 702 | case StatementType::If: { | ||
| 703 | ensure_block(); | ||
| 704 | IR::Block* const merge_block{MergeBlock(parent, stmt)}; | ||
| 705 | |||
| 706 | // Implement if header block | ||
| 707 | IR::IREmitter ir{*current_block}; | ||
| 708 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 709 | |||
| 710 | const size_t if_node_index{syntax_list.size()}; | ||
| 711 | syntax_list.emplace_back(); | ||
| 712 | |||
| 713 | // Visit children | ||
| 714 | const size_t then_block_index{syntax_list.size()}; | ||
| 715 | Visit(stmt, break_block, merge_block); | ||
| 716 | |||
| 717 | IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; | ||
| 718 | current_block->AddBranch(then_block); | ||
| 719 | current_block->AddBranch(merge_block); | ||
| 720 | current_block = merge_block; | ||
| 721 | |||
| 722 | auto& if_node{syntax_list[if_node_index]}; | ||
| 723 | if_node.type = IR::AbstractSyntaxNode::Type::If; | ||
| 724 | if_node.data.if_node.cond = cond; | ||
| 725 | if_node.data.if_node.body = then_block; | ||
| 726 | if_node.data.if_node.merge = merge_block; | ||
| 727 | |||
| 728 | auto& endif_node{syntax_list.emplace_back()}; | ||
| 729 | endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; | ||
| 730 | endif_node.data.end_if.merge = merge_block; | ||
| 731 | |||
| 732 | auto& merge{syntax_list.emplace_back()}; | ||
| 733 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 734 | merge.data.block = merge_block; | ||
| 735 | break; | ||
| 736 | } | ||
| 737 | case StatementType::Loop: { | ||
| 738 | IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; | ||
| 739 | if (current_block) { | ||
| 740 | current_block->AddBranch(loop_header_block); | ||
| 741 | } | ||
| 742 | auto& header_node{syntax_list.emplace_back()}; | ||
| 743 | header_node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 744 | header_node.data.block = loop_header_block; | ||
| 745 | |||
| 746 | IR::Block* const continue_block{block_pool.Create(inst_pool)}; | ||
| 747 | IR::Block* const merge_block{MergeBlock(parent, stmt)}; | ||
| 748 | |||
| 749 | const size_t loop_node_index{syntax_list.size()}; | ||
| 750 | syntax_list.emplace_back(); | ||
| 751 | |||
| 752 | // Visit children | ||
| 753 | const size_t body_block_index{syntax_list.size()}; | ||
| 754 | Visit(stmt, merge_block, continue_block); | ||
| 755 | |||
| 756 | // The continue block is located at the end of the loop | ||
| 757 | IR::IREmitter ir{*continue_block}; | ||
| 758 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 759 | |||
| 760 | IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; | ||
| 761 | loop_header_block->AddBranch(body_block); | ||
| 762 | |||
| 763 | continue_block->AddBranch(loop_header_block); | ||
| 764 | continue_block->AddBranch(merge_block); | ||
| 765 | |||
| 766 | current_block = merge_block; | ||
| 767 | |||
| 768 | auto& loop{syntax_list[loop_node_index]}; | ||
| 769 | loop.type = IR::AbstractSyntaxNode::Type::Loop; | ||
| 770 | loop.data.loop.body = body_block; | ||
| 771 | loop.data.loop.continue_block = continue_block; | ||
| 772 | loop.data.loop.merge = merge_block; | ||
| 773 | |||
| 774 | auto& continue_block_node{syntax_list.emplace_back()}; | ||
| 775 | continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 776 | continue_block_node.data.block = continue_block; | ||
| 777 | |||
| 778 | auto& repeat{syntax_list.emplace_back()}; | ||
| 779 | repeat.type = IR::AbstractSyntaxNode::Type::Repeat; | ||
| 780 | repeat.data.repeat.cond = cond; | ||
| 781 | repeat.data.repeat.loop_header = loop_header_block; | ||
| 782 | repeat.data.repeat.merge = merge_block; | ||
| 783 | |||
| 784 | auto& merge{syntax_list.emplace_back()}; | ||
| 785 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 786 | merge.data.block = merge_block; | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | case StatementType::Break: { | ||
| 790 | ensure_block(); | ||
| 791 | IR::Block* const skip_block{MergeBlock(parent, stmt)}; | ||
| 792 | |||
| 793 | IR::IREmitter ir{*current_block}; | ||
| 794 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 795 | current_block->AddBranch(break_block); | ||
| 796 | current_block->AddBranch(skip_block); | ||
| 797 | current_block = skip_block; | ||
| 798 | |||
| 799 | auto& break_node{syntax_list.emplace_back()}; | ||
| 800 | break_node.type = IR::AbstractSyntaxNode::Type::Break; | ||
| 801 | break_node.data.break_node.cond = cond; | ||
| 802 | break_node.data.break_node.merge = break_block; | ||
| 803 | break_node.data.break_node.skip = skip_block; | ||
| 804 | |||
| 805 | auto& merge{syntax_list.emplace_back()}; | ||
| 806 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 807 | merge.data.block = skip_block; | ||
| 808 | break; | ||
| 809 | } | ||
| 810 | case StatementType::Return: { | ||
| 811 | ensure_block(); | ||
| 812 | IR::IREmitter{*current_block}.Epilogue(); | ||
| 813 | current_block = nullptr; | ||
| 814 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | ||
| 815 | break; | ||
| 816 | } | ||
| 817 | case StatementType::Kill: { | ||
| 818 | ensure_block(); | ||
| 819 | IR::Block* demote_block{MergeBlock(parent, stmt)}; | ||
| 820 | IR::IREmitter{*current_block}.DemoteToHelperInvocation(); | ||
| 821 | current_block->AddBranch(demote_block); | ||
| 822 | current_block = demote_block; | ||
| 823 | |||
| 824 | auto& merge{syntax_list.emplace_back()}; | ||
| 825 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 826 | merge.data.block = demote_block; | ||
| 827 | break; | ||
| 828 | } | ||
| 829 | case StatementType::Unreachable: { | ||
| 830 | ensure_block(); | ||
| 831 | current_block = nullptr; | ||
| 832 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; | ||
| 833 | break; | ||
| 834 | } | ||
| 835 | default: | ||
| 836 | throw NotImplementedException("Statement type {}", stmt.type); | ||
| 837 | } | ||
| 838 | } | ||
| 839 | if (current_block) { | ||
| 840 | if (fallthrough_block) { | ||
| 841 | current_block->AddBranch(fallthrough_block); | ||
| 842 | } else { | ||
| 843 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; | ||
| 844 | } | ||
| 845 | } | ||
| 846 | } | ||
| 847 | |||
| 848 | IR::Block* MergeBlock(Statement& parent, Statement& stmt) { | ||
| 849 | Statement* merge_stmt{TryFindForwardBlock(stmt)}; | ||
| 850 | if (!merge_stmt) { | ||
| 851 | // Create a merge block we can visit later | ||
| 852 | merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); | ||
| 853 | parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); | ||
| 854 | } | ||
| 855 | return block_pool.Create(inst_pool); | ||
| 856 | } | ||
| 857 | |||
| 858 | ObjectPool<Statement>& stmt_pool; | ||
| 859 | ObjectPool<IR::Inst>& inst_pool; | ||
| 860 | ObjectPool<IR::Block>& block_pool; | ||
| 861 | Environment& env; | ||
| 862 | IR::AbstractSyntaxList& syntax_list; | ||
| 863 | |||
| 864 | // TODO: C++20 Remove this when all compilers support constexpr std::vector | ||
| 865 | #if __cpp_lib_constexpr_vector >= 201907 | ||
| 866 | static constexpr Flow::Block dummy_flow_block; | ||
| 867 | #else | ||
| 868 | const Flow::Block dummy_flow_block; | ||
| 869 | #endif | ||
| 870 | }; | ||
| 871 | } // Anonymous namespace | ||
| 872 | |||
| 873 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||
| 874 | Environment& env, Flow::CFG& cfg) { | ||
| 875 | ObjectPool<Statement> stmt_pool{64}; | ||
| 876 | GotoPass goto_pass{cfg, stmt_pool}; | ||
| 877 | Statement& root{goto_pass.RootStatement()}; | ||
| 878 | IR::AbstractSyntaxList syntax_list; | ||
| 879 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; | ||
| 880 | return syntax_list; | ||
| 881 | } | ||
| 882 | |||
| 883 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 000000000..88b083649 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | |||
| 16 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, | ||
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | ||
| 18 | Flow::CFG& cfg); | ||
| 19 | |||
| 20 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..d9f999e05 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | SAFEADD, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class AtomSize : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | F32, | ||
| 29 | F16x2, | ||
| 30 | S64, | ||
| 31 | }; | ||
| 32 | |||
| 33 | IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||
| 34 | AtomOp op, bool is_signed) { | ||
| 35 | switch (op) { | ||
| 36 | case AtomOp::ADD: | ||
| 37 | return ir.GlobalAtomicIAdd(offset, op_b); | ||
| 38 | case AtomOp::MIN: | ||
| 39 | return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||
| 40 | case AtomOp::MAX: | ||
| 41 | return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||
| 42 | case AtomOp::INC: | ||
| 43 | return ir.GlobalAtomicInc(offset, op_b); | ||
| 44 | case AtomOp::DEC: | ||
| 45 | return ir.GlobalAtomicDec(offset, op_b); | ||
| 46 | case AtomOp::AND: | ||
| 47 | return ir.GlobalAtomicAnd(offset, op_b); | ||
| 48 | case AtomOp::OR: | ||
| 49 | return ir.GlobalAtomicOr(offset, op_b); | ||
| 50 | case AtomOp::XOR: | ||
| 51 | return ir.GlobalAtomicXor(offset, op_b); | ||
| 52 | case AtomOp::EXCH: | ||
| 53 | return ir.GlobalAtomicExchange(offset, op_b); | ||
| 54 | default: | ||
| 55 | throw NotImplementedException("Integer Atom Operation {}", op); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||
| 60 | AtomSize size) { | ||
| 61 | static constexpr IR::FpControl f16_control{ | ||
| 62 | .no_contraction = false, | ||
| 63 | .rounding = IR::FpRounding::RN, | ||
| 64 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 65 | }; | ||
| 66 | static constexpr IR::FpControl f32_control{ | ||
| 67 | .no_contraction = false, | ||
| 68 | .rounding = IR::FpRounding::RN, | ||
| 69 | .fmz_mode = IR::FmzMode::FTZ, | ||
| 70 | }; | ||
| 71 | switch (op) { | ||
| 72 | case AtomOp::ADD: | ||
| 73 | return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||
| 74 | : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||
| 75 | case AtomOp::MIN: | ||
| 76 | return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||
| 77 | case AtomOp::MAX: | ||
| 78 | return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||
| 79 | default: | ||
| 80 | throw NotImplementedException("FP Atom Operation {}", op); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 88 | BitField<28, 20, s64> addr_offset; | ||
| 89 | BitField<28, 20, u64> rz_addr_offset; | ||
| 90 | BitField<48, 1, u64> e; | ||
| 91 | } const mem{insn}; | ||
| 92 | |||
| 93 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 94 | if (mem.e == 0) { | ||
| 95 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 96 | } | ||
| 97 | return v.L(mem.addr_reg); | ||
| 98 | }()}; | ||
| 99 | const u64 addr_offset{[&]() -> u64 { | ||
| 100 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 101 | // When RZ is used, the address is an absolute address | ||
| 102 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 103 | } else { | ||
| 104 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 105 | } | ||
| 106 | }()}; | ||
| 107 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 108 | } | ||
| 109 | |||
| 110 | bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||
| 111 | // TODO: SAFEADD | ||
| 112 | switch (size) { | ||
| 113 | case AtomSize::S32: | ||
| 114 | case AtomSize::U64: | ||
| 115 | return (op == AtomOp::INC || op == AtomOp::DEC); | ||
| 116 | case AtomSize::S64: | ||
| 117 | return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 118 | case AtomSize::F32: | ||
| 119 | return op != AtomOp::ADD; | ||
| 120 | case AtomSize::F16x2: | ||
| 121 | return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 122 | default: | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||
| 128 | switch (size) { | ||
| 129 | case AtomSize::U32: | ||
| 130 | case AtomSize::S32: | ||
| 131 | case AtomSize::F32: | ||
| 132 | case AtomSize::F16x2: | ||
| 133 | return ir.LoadGlobal32(offset); | ||
| 134 | case AtomSize::U64: | ||
| 135 | case AtomSize::S64: | ||
| 136 | return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||
| 137 | default: | ||
| 138 | throw NotImplementedException("Atom Size {}", size); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||
| 143 | switch (size) { | ||
| 144 | case AtomSize::U32: | ||
| 145 | case AtomSize::S32: | ||
| 146 | case AtomSize::F16x2: | ||
| 147 | return v.X(dest_reg, IR::U32{result}); | ||
| 148 | case AtomSize::U64: | ||
| 149 | case AtomSize::S64: | ||
| 150 | return v.L(dest_reg, IR::U64{result}); | ||
| 151 | case AtomSize::F32: | ||
| 152 | return v.F(dest_reg, IR::F32{result}); | ||
| 153 | default: | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, | ||
| 159 | AtomSize size, AtomOp op) { | ||
| 160 | switch (size) { | ||
| 161 | case AtomSize::U32: | ||
| 162 | case AtomSize::S32: | ||
| 163 | return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); | ||
| 164 | case AtomSize::U64: | ||
| 165 | case AtomSize::S64: | ||
| 166 | return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); | ||
| 167 | case AtomSize::F32: | ||
| 168 | return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); | ||
| 169 | case AtomSize::F16x2: { | ||
| 170 | return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); | ||
| 171 | } | ||
| 172 | default: | ||
| 173 | throw NotImplementedException("Atom Size {}", size); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, | ||
| 178 | const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { | ||
| 179 | IR::Value result; | ||
| 180 | if (AtomOpNotApplicable(size, op)) { | ||
| 181 | result = LoadGlobal(v.ir, offset, size); | ||
| 182 | } else { | ||
| 183 | result = ApplyAtomOp(v, operand_reg, offset, size, op); | ||
| 184 | } | ||
| 185 | if (write_dest) { | ||
| 186 | StoreResult(v, dest_reg, result, size); | ||
| 187 | } | ||
| 188 | } | ||
| 189 | } // Anonymous namespace | ||
| 190 | |||
| 191 | void TranslatorVisitor::ATOM(u64 insn) { | ||
| 192 | union { | ||
| 193 | u64 raw; | ||
| 194 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 195 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<49, 3, AtomSize> size; | ||
| 197 | BitField<52, 4, AtomOp> op; | ||
| 198 | } const atom{insn}; | ||
| 199 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 200 | GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void TranslatorVisitor::RED(u64 insn) { | ||
| 204 | union { | ||
| 205 | u64 raw; | ||
| 206 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 207 | BitField<20, 3, AtomSize> size; | ||
| 208 | BitField<23, 3, AtomOp> op; | ||
| 209 | } const red{insn}; | ||
| 210 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 211 | GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum class AtomsSize : u64 { | ||
| 24 | U32, | ||
| 25 | S32, | ||
| 26 | U64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||
| 30 | bool is_signed) { | ||
| 31 | switch (op) { | ||
| 32 | case AtomOp::ADD: | ||
| 33 | return ir.SharedAtomicIAdd(offset, op_b); | ||
| 34 | case AtomOp::MIN: | ||
| 35 | return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||
| 36 | case AtomOp::MAX: | ||
| 37 | return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||
| 38 | case AtomOp::INC: | ||
| 39 | return ir.SharedAtomicInc(offset, op_b); | ||
| 40 | case AtomOp::DEC: | ||
| 41 | return ir.SharedAtomicDec(offset, op_b); | ||
| 42 | case AtomOp::AND: | ||
| 43 | return ir.SharedAtomicAnd(offset, op_b); | ||
| 44 | case AtomOp::OR: | ||
| 45 | return ir.SharedAtomicOr(offset, op_b); | ||
| 46 | case AtomOp::XOR: | ||
| 47 | return ir.SharedAtomicXor(offset, op_b); | ||
| 48 | case AtomOp::EXCH: | ||
| 49 | return ir.SharedAtomicExchange(offset, op_b); | ||
| 50 | default: | ||
| 51 | throw NotImplementedException("Integer Atoms Operation {}", op); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 59 | BitField<30, 22, u64> absolute_offset; | ||
| 60 | BitField<30, 22, s64> relative_offset; | ||
| 61 | } const encoding{insn}; | ||
| 62 | |||
| 63 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 64 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||
| 65 | } else { | ||
| 66 | const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||
| 67 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||
| 72 | switch (size) { | ||
| 73 | case AtomsSize::U32: | ||
| 74 | case AtomsSize::S32: | ||
| 75 | return v.X(dest_reg, IR::U32{result}); | ||
| 76 | case AtomsSize::U64: | ||
| 77 | return v.L(dest_reg, IR::U64{result}); | ||
| 78 | default: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } // Anonymous namespace | ||
| 83 | |||
| 84 | void TranslatorVisitor::ATOMS(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 89 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 90 | BitField<28, 2, AtomsSize> size; | ||
| 91 | BitField<52, 4, AtomOp> op; | ||
| 92 | } const atoms{insn}; | ||
| 93 | |||
| 94 | const bool size_64{atoms.size == AtomsSize::U64}; | ||
| 95 | if (size_64 && atoms.op != AtomOp::EXCH) { | ||
| 96 | throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||
| 97 | } | ||
| 98 | const bool is_signed{atoms.size == AtomsSize::S32}; | ||
| 99 | const IR::U32 offset{AtomsOffset(*this, insn)}; | ||
| 100 | |||
| 101 | IR::Value result; | ||
| 102 | if (size_64) { | ||
| 103 | result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||
| 104 | } else { | ||
| 105 | result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||
| 106 | } | ||
| 107 | StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | enum class BitSize : u64 { | ||
| 13 | B32, | ||
| 14 | B64, | ||
| 15 | B96, | ||
| 16 | B128, | ||
| 17 | }; | ||
| 18 | |||
| 19 | void TranslatorVisitor::AL2P(u64 inst) { | ||
| 20 | union { | ||
| 21 | u64 raw; | ||
| 22 | BitField<0, 8, IR::Reg> result_register; | ||
| 23 | BitField<8, 8, IR::Reg> indexing_register; | ||
| 24 | BitField<20, 11, s64> offset; | ||
| 25 | BitField<47, 2, BitSize> bitsize; | ||
| 26 | } al2p{inst}; | ||
| 27 | if (al2p.bitsize != BitSize::B32) { | ||
| 28 | throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); | ||
| 29 | } | ||
| 30 | const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))}; | ||
| 31 | const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; | ||
| 32 | X(al2p.result_register, result); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..86e433e41 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | // Seems to be in CUDA terminology. | ||
| 14 | enum class LocalScope : u64 { | ||
| 15 | CTA, | ||
| 16 | GL, | ||
| 17 | SYS, | ||
| 18 | VC, | ||
| 19 | }; | ||
| 20 | } // Anonymous namespace | ||
| 21 | |||
| 22 | void TranslatorVisitor::MEMBAR(u64 inst) { | ||
| 23 | union { | ||
| 24 | u64 raw; | ||
| 25 | BitField<8, 2, LocalScope> scope; | ||
| 26 | } const membar{inst}; | ||
| 27 | |||
| 28 | if (membar.scope == LocalScope::CTA) { | ||
| 29 | ir.WorkgroupMemoryBarrier(); | ||
| 30 | } else { | ||
| 31 | ir.DeviceMemoryBarrier(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::DEPBAR() { | ||
| 36 | // DEPBAR is a no-op | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::BAR(u64 insn) { | ||
| 40 | enum class Mode { | ||
| 41 | RedPopc, | ||
| 42 | Scan, | ||
| 43 | RedAnd, | ||
| 44 | RedOr, | ||
| 45 | Sync, | ||
| 46 | Arrive, | ||
| 47 | }; | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<43, 1, u64> is_a_imm; | ||
| 51 | BitField<44, 1, u64> is_b_imm; | ||
| 52 | BitField<8, 8, u64> imm_a; | ||
| 53 | BitField<20, 12, u64> imm_b; | ||
| 54 | BitField<42, 1, u64> neg_pred; | ||
| 55 | BitField<39, 3, IR::Pred> pred; | ||
| 56 | } const bar{insn}; | ||
| 57 | |||
| 58 | const Mode mode{[insn] { | ||
| 59 | switch (insn & 0x0000009B00000000ULL) { | ||
| 60 | case 0x0000000200000000ULL: | ||
| 61 | return Mode::RedPopc; | ||
| 62 | case 0x0000000300000000ULL: | ||
| 63 | return Mode::Scan; | ||
| 64 | case 0x0000000A00000000ULL: | ||
| 65 | return Mode::RedAnd; | ||
| 66 | case 0x0000001200000000ULL: | ||
| 67 | return Mode::RedOr; | ||
| 68 | case 0x0000008000000000ULL: | ||
| 69 | return Mode::Sync; | ||
| 70 | case 0x0000008100000000ULL: | ||
| 71 | return Mode::Arrive; | ||
| 72 | } | ||
| 73 | throw NotImplementedException("Invalid encoding"); | ||
| 74 | }()}; | ||
| 75 | if (mode != Mode::Sync) { | ||
| 76 | throw NotImplementedException("BAR mode {}", mode); | ||
| 77 | } | ||
| 78 | if (bar.is_a_imm == 0) { | ||
| 79 | throw NotImplementedException("Non-immediate input A"); | ||
| 80 | } | ||
| 81 | if (bar.imm_a != 0) { | ||
| 82 | throw NotImplementedException("Non-zero input A"); | ||
| 83 | } | ||
| 84 | if (bar.is_b_imm == 0) { | ||
| 85 | throw NotImplementedException("Non-immediate input B"); | ||
| 86 | } | ||
| 87 | if (bar.imm_b != 0) { | ||
| 88 | throw NotImplementedException("Non-zero input B"); | ||
| 89 | } | ||
| 90 | if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { | ||
| 91 | throw NotImplementedException("Non-true input predicate"); | ||
| 92 | } | ||
| 93 | ir.Barrier(); | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..9d5a87e52 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 16 | BitField<40, 1, u64> brev; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const bfe{insn}; | ||
| 20 | |||
| 21 | const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 23 | |||
| 24 | // Common constants | ||
| 25 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 26 | const IR::U32 one{v.ir.Imm32(1)}; | ||
| 27 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 28 | // Edge case conditions | ||
| 29 | const IR::U1 zero_count{v.ir.IEqual(count, zero)}; | ||
| 30 | const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; | ||
| 31 | const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 32 | |||
| 33 | IR::U32 base{v.X(bfe.offset_reg)}; | ||
| 34 | if (bfe.brev != 0) { | ||
| 35 | base = v.ir.BitReverse(base); | ||
| 36 | } | ||
| 37 | IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; | ||
| 38 | if (bfe.is_signed != 0) { | ||
| 39 | const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; | ||
| 40 | const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 41 | const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; | ||
| 42 | // Replicate condition | ||
| 43 | result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; | ||
| 44 | // Exceeding condition | ||
| 45 | const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; | ||
| 46 | result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; | ||
| 47 | } | ||
| 48 | // Zero count condition | ||
| 49 | result = IR::U32{v.ir.Select(zero_count, zero, result)}; | ||
| 50 | |||
| 51 | v.X(bfe.dest_reg, result); | ||
| 52 | |||
| 53 | if (bfe.cc != 0) { | ||
| 54 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 55 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 56 | v.ResetCFlag(); | ||
| 57 | v.ResetOFlag(); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | } // Anonymous namespace | ||
| 61 | |||
| 62 | void TranslatorVisitor::BFE_reg(u64 insn) { | ||
| 63 | BFE(*this, insn, GetReg20(insn)); | ||
| 64 | } | ||
| 65 | |||
| 66 | void TranslatorVisitor::BFE_cbuf(u64 insn) { | ||
| 67 | BFE(*this, insn, GetCbuf(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::BFE_imm(u64 insn) { | ||
| 71 | BFE(*this, insn, GetImm20(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..1e1ec2119 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> insert_reg; | ||
| 16 | BitField<47, 1, u64> cc; | ||
| 17 | } const bfi{insn}; | ||
| 18 | |||
| 19 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 20 | const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; | ||
| 21 | const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 23 | |||
| 24 | // Edge case conditions | ||
| 25 | const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 26 | const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; | ||
| 27 | |||
| 28 | const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; | ||
| 29 | const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; | ||
| 30 | |||
| 31 | const IR::U32 insert{v.X(bfi.insert_reg)}; | ||
| 32 | IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; | ||
| 33 | |||
| 34 | result = IR::U32{v.ir.Select(exceed_offset, base, result)}; | ||
| 35 | |||
| 36 | v.X(bfi.dest_reg, result); | ||
| 37 | if (bfi.cc != 0) { | ||
| 38 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 39 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 40 | v.ResetCFlag(); | ||
| 41 | v.ResetOFlag(); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::BFI_reg(u64 insn) { | ||
| 47 | BFI(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::BFI_rc(u64 insn) { | ||
| 51 | BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::BFI_cr(u64 insn) { | ||
| 55 | BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::BFI_imm(u64 insn) { | ||
| 59 | BFI(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void Check(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<5, 1, u64> cbuf_mode; | ||
| 16 | BitField<6, 1, u64> lmt; | ||
| 17 | } const encoding{insn}; | ||
| 18 | |||
| 19 | if (encoding.cbuf_mode != 0) { | ||
| 20 | throw NotImplementedException("Constant buffer mode"); | ||
| 21 | } | ||
| 22 | if (encoding.lmt != 0) { | ||
| 23 | throw NotImplementedException("LMT"); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::BRX(u64 insn) { | ||
| 29 | Check(insn); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::JMX(u64 insn) { | ||
| 33 | Check(insn); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..fd73f656c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class FpRounding : u64 { | ||
| 15 | RN, | ||
| 16 | RM, | ||
| 17 | RP, | ||
| 18 | RZ, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class FmzMode : u64 { | ||
| 22 | None, | ||
| 23 | FTZ, | ||
| 24 | FMZ, | ||
| 25 | INVALIDFMZ3, | ||
| 26 | }; | ||
| 27 | |||
| 28 | inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||
| 29 | switch (fp_rounding) { | ||
| 30 | case FpRounding::RN: | ||
| 31 | return IR::FpRounding::RN; | ||
| 32 | case FpRounding::RM: | ||
| 33 | return IR::FpRounding::RM; | ||
| 34 | case FpRounding::RP: | ||
| 35 | return IR::FpRounding::RP; | ||
| 36 | case FpRounding::RZ: | ||
| 37 | return IR::FpRounding::RZ; | ||
| 38 | } | ||
| 39 | throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||
| 40 | } | ||
| 41 | |||
| 42 | inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||
| 43 | switch (fmz_mode) { | ||
| 44 | case FmzMode::None: | ||
| 45 | return IR::FmzMode::None; | ||
| 46 | case FmzMode::FTZ: | ||
| 47 | return IR::FmzMode::FTZ; | ||
| 48 | case FmzMode::FMZ: | ||
| 49 | // FMZ is manually handled in the instruction | ||
| 50 | return IR::FmzMode::FTZ; | ||
| 51 | case FmzMode::INVALIDFMZ3: | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..20458d2ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 9 | CompareOp compare_op, bool is_signed) { | ||
| 10 | switch (compare_op) { | ||
| 11 | case CompareOp::False: | ||
| 12 | return ir.Imm1(false); | ||
| 13 | case CompareOp::LessThan: | ||
| 14 | return ir.ILessThan(operand_1, operand_2, is_signed); | ||
| 15 | case CompareOp::Equal: | ||
| 16 | return ir.IEqual(operand_1, operand_2); | ||
| 17 | case CompareOp::LessThanEqual: | ||
| 18 | return ir.ILessThanEqual(operand_1, operand_2, is_signed); | ||
| 19 | case CompareOp::GreaterThan: | ||
| 20 | return ir.IGreaterThan(operand_1, operand_2, is_signed); | ||
| 21 | case CompareOp::NotEqual: | ||
| 22 | return ir.INotEqual(operand_1, operand_2); | ||
| 23 | case CompareOp::GreaterThanEqual: | ||
| 24 | return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); | ||
| 25 | case CompareOp::True: | ||
| 26 | return ir.Imm1(true); | ||
| 27 | default: | ||
| 28 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 33 | CompareOp compare_op, bool is_signed) { | ||
| 34 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 35 | const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; | ||
| 36 | const IR::U1 z_flag{ir.GetZFlag()}; | ||
| 37 | const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; | ||
| 38 | const IR::U1 flip_logic{is_signed ? ir.Imm1(false) | ||
| 39 | : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), | ||
| 40 | ir.ILessThan(operand_2, zero, true))}; | ||
| 41 | switch (compare_op) { | ||
| 42 | case CompareOp::False: | ||
| 43 | return ir.Imm1(false); | ||
| 44 | case CompareOp::LessThan: | ||
| 45 | return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 46 | ir.ILessThan(intermediate, zero, true))}; | ||
| 47 | case CompareOp::Equal: | ||
| 48 | return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); | ||
| 49 | case CompareOp::LessThanEqual: { | ||
| 50 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 51 | ir.ILessThan(intermediate, zero, true))}; | ||
| 52 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 53 | } | ||
| 54 | case CompareOp::GreaterThan: { | ||
| 55 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), | ||
| 56 | ir.IGreaterThan(intermediate, zero, true))}; | ||
| 57 | const IR::U1 not_z{ir.LogicalNot(z_flag)}; | ||
| 58 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); | ||
| 59 | } | ||
| 60 | case CompareOp::NotEqual: | ||
| 61 | return ir.LogicalOr(ir.INotEqual(intermediate, zero), | ||
| 62 | ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); | ||
| 63 | case CompareOp::GreaterThanEqual: { | ||
| 64 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), | ||
| 65 | ir.IGreaterThanEqual(intermediate, zero, true))}; | ||
| 66 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 67 | } | ||
| 68 | case CompareOp::True: | ||
| 69 | return ir.Imm1(true); | ||
| 70 | default: | ||
| 71 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, | ||
| 76 | BooleanOp bop) { | ||
| 77 | switch (bop) { | ||
| 78 | case BooleanOp::AND: | ||
| 79 | return ir.LogicalAnd(predicate_1, predicate_2); | ||
| 80 | case BooleanOp::OR: | ||
| 81 | return ir.LogicalOr(predicate_1, predicate_2); | ||
| 82 | case BooleanOp::XOR: | ||
| 83 | return ir.LogicalXor(predicate_1, predicate_2); | ||
| 84 | default: | ||
| 85 | throw NotImplementedException("Invalid bop {}", bop); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { | ||
| 90 | switch (op) { | ||
| 91 | case PredicateOp::False: | ||
| 92 | return ir.Imm1(false); | ||
| 93 | case PredicateOp::True: | ||
| 94 | return ir.Imm1(true); | ||
| 95 | case PredicateOp::Zero: | ||
| 96 | return ir.IEqual(result, ir.Imm32(0)); | ||
| 97 | case PredicateOp::NonZero: | ||
| 98 | return ir.INotEqual(result, ir.Imm32(0)); | ||
| 99 | default: | ||
| 100 | throw NotImplementedException("Invalid Predicate operation {}", op); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | bool IsCompareOpOrdered(FPCompareOp op) { | ||
| 105 | switch (op) { | ||
| 106 | case FPCompareOp::LTU: | ||
| 107 | case FPCompareOp::EQU: | ||
| 108 | case FPCompareOp::LEU: | ||
| 109 | case FPCompareOp::GTU: | ||
| 110 | case FPCompareOp::NEU: | ||
| 111 | case FPCompareOp::GEU: | ||
| 112 | return false; | ||
| 113 | default: | ||
| 114 | return true; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 119 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 120 | IR::FpControl control) { | ||
| 121 | const bool ordered{IsCompareOpOrdered(compare_op)}; | ||
| 122 | switch (compare_op) { | ||
| 123 | case FPCompareOp::F: | ||
| 124 | return ir.Imm1(false); | ||
| 125 | case FPCompareOp::LT: | ||
| 126 | case FPCompareOp::LTU: | ||
| 127 | return ir.FPLessThan(operand_1, operand_2, control, ordered); | ||
| 128 | case FPCompareOp::EQ: | ||
| 129 | case FPCompareOp::EQU: | ||
| 130 | return ir.FPEqual(operand_1, operand_2, control, ordered); | ||
| 131 | case FPCompareOp::LE: | ||
| 132 | case FPCompareOp::LEU: | ||
| 133 | return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); | ||
| 134 | case FPCompareOp::GT: | ||
| 135 | case FPCompareOp::GTU: | ||
| 136 | return ir.FPGreaterThan(operand_1, operand_2, control, ordered); | ||
| 137 | case FPCompareOp::NE: | ||
| 138 | case FPCompareOp::NEU: | ||
| 139 | return ir.FPNotEqual(operand_1, operand_2, control, ordered); | ||
| 140 | case FPCompareOp::GE: | ||
| 141 | case FPCompareOp::GEU: | ||
| 142 | return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); | ||
| 143 | case FPCompareOp::NUM: | ||
| 144 | return ir.FPOrdered(operand_1, operand_2); | ||
| 145 | case FPCompareOp::Nan: | ||
| 146 | return ir.FPUnordered(operand_1, operand_2); | ||
| 147 | case FPCompareOp::T: | ||
| 148 | return ir.Imm1(true); | ||
| 149 | default: | ||
| 150 | throw NotImplementedException("Invalid FP compare op {}", compare_op); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..214d0af3c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 12 | const IR::U32& operand_2, CompareOp compare_op, bool is_signed); | ||
| 13 | |||
| 14 | [[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 15 | const IR::U32& operand_2, CompareOp compare_op, | ||
| 16 | bool is_signed); | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, | ||
| 19 | const IR::U1& predicate_2, BooleanOp bop); | ||
| 20 | |||
| 21 | [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); | ||
| 22 | |||
| 23 | [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); | ||
| 24 | |||
| 25 | [[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 26 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 27 | IR::FpControl control = {}); | ||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..420f2fb94 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void TranslatorVisitor::CSET(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 17 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 18 | BitField<42, 1, u64> neg_bop_pred; | ||
| 19 | BitField<44, 1, u64> bf; | ||
| 20 | BitField<45, 2, BooleanOp> bop; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | } const cset{insn}; | ||
| 23 | |||
| 24 | const IR::U32 one_mask{ir.Imm32(-1)}; | ||
| 25 | const IR::U32 fp_one{ir.Imm32(0x3f800000)}; | ||
| 26 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 27 | const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; | ||
| 28 | const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; | ||
| 29 | const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; | ||
| 30 | const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; | ||
| 31 | const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; | ||
| 32 | X(cset.dest_reg, result); | ||
| 33 | if (cset.cc != 0) { | ||
| 34 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 35 | SetZFlag(is_zero); | ||
| 36 | if (cset.bf != 0) { | ||
| 37 | ResetSFlag(); | ||
| 38 | } else { | ||
| 39 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 40 | } | ||
| 41 | ResetOFlag(); | ||
| 42 | ResetCFlag(); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::CSETP(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 50 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 51 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 52 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 53 | BitField<42, 1, u64> neg_bop_pred; | ||
| 54 | BitField<45, 2, BooleanOp> bop; | ||
| 55 | } const csetp{insn}; | ||
| 56 | |||
| 57 | const BooleanOp bop{csetp.bop}; | ||
| 58 | const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; | ||
| 59 | const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; | ||
| 60 | const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; | ||
| 61 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; | ||
| 62 | ir.SetPred(csetp.dest_pred_a, result_a); | ||
| 63 | ir.SetPred(csetp.dest_pred_b, result_b); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..5a1b3a8fc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<45, 1, u64> neg_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> neg_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const dadd{insn}; | ||
| 25 | if (dadd.cc != 0) { | ||
| 26 | throw NotImplementedException("DADD CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::F64 src_a{v.D(dadd.src_a_reg)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; | ||
| 32 | |||
| 33 | const IR::FpControl control{ | ||
| 34 | .no_contraction = true, | ||
| 35 | .rounding = CastFpRounding(dadd.fp_rounding), | ||
| 36 | .fmz_mode = IR::FmzMode::None, | ||
| 37 | }; | ||
| 38 | |||
| 39 | v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DADD_reg(u64 insn) { | ||
| 44 | DADD(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DADD_cbuf(u64 insn) { | ||
| 48 | DADD(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DADD_imm(u64 insn) { | ||
| 52 | DADD(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..1173192e4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | } const dset{insn}; | ||
| 28 | |||
| 29 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; | ||
| 30 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; | ||
| 31 | |||
| 32 | IR::U1 pred{v.ir.GetPred(dset.pred)}; | ||
| 33 | if (dset.neg_pred != 0) { | ||
| 34 | pred = v.ir.LogicalNot(pred); | ||
| 35 | } | ||
| 36 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; | ||
| 37 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; | ||
| 38 | |||
| 39 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 40 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 41 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 42 | const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; | ||
| 43 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 44 | |||
| 45 | v.X(dset.dest_reg, result); | ||
| 46 | if (dset.cc != 0) { | ||
| 47 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 48 | v.SetZFlag(is_zero); | ||
| 49 | if (dset.bf != 0) { | ||
| 50 | v.ResetSFlag(); | ||
| 51 | } else { | ||
| 52 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 53 | } | ||
| 54 | v.ResetCFlag(); | ||
| 55 | v.ResetOFlag(); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::DSET_reg(u64 insn) { | ||
| 61 | DSET(*this, insn, GetDoubleReg20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::DSET_cbuf(u64 insn) { | ||
| 65 | DSET(*this, insn, GetDoubleCbuf(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::DSET_imm(u64 insn) { | ||
| 69 | DSET(*this, insn, GetDoubleImm20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..f66097014 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<50, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg_b; | ||
| 21 | BitField<49, 1, u64> neg_c; | ||
| 22 | } const dfma{insn}; | ||
| 23 | |||
| 24 | if (dfma.cc != 0) { | ||
| 25 | throw NotImplementedException("DFMA CC"); | ||
| 26 | } | ||
| 27 | |||
| 28 | const IR::F64 src_a{v.D(dfma.src_a_reg)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; | ||
| 30 | const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; | ||
| 31 | |||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = true, | ||
| 34 | .rounding = CastFpRounding(dfma.fp_rounding), | ||
| 35 | .fmz_mode = IR::FmzMode::None, | ||
| 36 | }; | ||
| 37 | |||
| 38 | v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DFMA_reg(u64 insn) { | ||
| 43 | DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DFMA_cr(u64 insn) { | ||
| 47 | DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DFMA_rc(u64 insn) { | ||
| 51 | DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::DFMA_imm(u64 insn) { | ||
| 55 | DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..6b551847c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<45, 1, u64> negate_b; | ||
| 19 | BitField<46, 1, u64> abs_a; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> negate_a; | ||
| 22 | BitField<49, 1, u64> abs_b; | ||
| 23 | } const dmnmx{insn}; | ||
| 24 | |||
| 25 | if (dmnmx.cc != 0) { | ||
| 26 | throw NotImplementedException("DMNMX CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; | ||
| 32 | |||
| 33 | IR::F64 max{v.ir.FPMax(op_a, op_b)}; | ||
| 34 | IR::F64 min{v.ir.FPMin(op_a, op_b)}; | ||
| 35 | |||
| 36 | if (dmnmx.neg_pred != 0) { | ||
| 37 | std::swap(min, max); | ||
| 38 | } | ||
| 39 | v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DMNMX_reg(u64 insn) { | ||
| 44 | DMNMX(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DMNMX_cbuf(u64 insn) { | ||
| 48 | DMNMX(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DMNMX_imm(u64 insn) { | ||
| 52 | DMNMX(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..c0159fb65 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg; | ||
| 21 | } const dmul{insn}; | ||
| 22 | |||
| 23 | if (dmul.cc != 0) { | ||
| 24 | throw NotImplementedException("DMUL CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; | ||
| 28 | const IR::FpControl control{ | ||
| 29 | .no_contraction = true, | ||
| 30 | .rounding = CastFpRounding(dmul.fp_rounding), | ||
| 31 | .fmz_mode = IR::FmzMode::None, | ||
| 32 | }; | ||
| 33 | |||
| 34 | v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); | ||
| 35 | } | ||
| 36 | } // Anonymous namespace | ||
| 37 | |||
| 38 | void TranslatorVisitor::DMUL_reg(u64 insn) { | ||
| 39 | DMUL(*this, insn, GetDoubleReg20(insn)); | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::DMUL_cbuf(u64 insn) { | ||
| 43 | DMUL(*this, insn, GetDoubleCbuf(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DMUL_imm(u64 insn) { | ||
| 47 | DMUL(*this, insn, GetDoubleImm20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 26 | } const dsetp{insn}; | ||
| 27 | |||
| 28 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; | ||
| 30 | |||
| 31 | const BooleanOp bop{dsetp.bop}; | ||
| 32 | const FPCompareOp compare_op{dsetp.compare_op}; | ||
| 33 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; | ||
| 34 | const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; | ||
| 35 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 36 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 37 | v.ir.SetPred(dsetp.dest_pred_a, result_a); | ||
| 38 | v.ir.SetPred(dsetp.dest_pred_b, result_b); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DSETP_reg(u64 insn) { | ||
| 43 | DSETP(*this, insn, GetDoubleReg20(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DSETP_cbuf(u64 insn) { | ||
| 47 | DSETP(*this, insn, GetDoubleCbuf(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DSETP_imm(u64 insn) { | ||
| 51 | DSETP(*this, insn, GetDoubleImm20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..c2443c886 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ExitFragment(TranslatorVisitor& v) { | ||
| 12 | const ProgramHeader sph{v.env.SPH()}; | ||
| 13 | IR::Reg src_reg{IR::Reg::R0}; | ||
| 14 | for (u32 render_target = 0; render_target < 8; ++render_target) { | ||
| 15 | const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; | ||
| 16 | for (u32 component = 0; component < 4; ++component) { | ||
| 17 | if (!mask[component]) { | ||
| 18 | continue; | ||
| 19 | } | ||
| 20 | v.ir.SetFragColor(render_target, component, v.F(src_reg)); | ||
| 21 | ++src_reg; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (sph.ps.omap.sample_mask != 0) { | ||
| 25 | v.ir.SetSampleMask(v.X(src_reg)); | ||
| 26 | } | ||
| 27 | if (sph.ps.omap.depth != 0) { | ||
| 28 | v.ir.SetFragDepth(v.F(src_reg + 1)); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::EXIT() { | ||
| 34 | switch (env.ShaderStage()) { | ||
| 35 | case Stage::Fragment: | ||
| 36 | ExitFragment(*this); | ||
| 37 | break; | ||
| 38 | default: | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..f0cb25d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | BitField<41, 1, u64> shift; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const flo{insn}; | ||
| 20 | |||
| 21 | if (flo.cc != 0) { | ||
| 22 | throw NotImplementedException("CC"); | ||
| 23 | } | ||
| 24 | if (flo.tilde != 0) { | ||
| 25 | src = v.ir.BitwiseNot(src); | ||
| 26 | } | ||
| 27 | IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; | ||
| 28 | if (flo.shift != 0) { | ||
| 29 | const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; | ||
| 30 | result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; | ||
| 31 | } | ||
| 32 | v.X(flo.dest_reg, result); | ||
| 33 | } | ||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | void TranslatorVisitor::FLO_reg(u64 insn) { | ||
| 37 | FLO(*this, insn, GetReg20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::FLO_cbuf(u64 insn) { | ||
| 41 | FLO(*this, insn, GetCbuf(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::FLO_imm(u64 insn) { | ||
| 45 | FLO(*this, insn, GetImm20(insn)); | ||
| 46 | } | ||
| 47 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..b8c89810c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||
| 13 | const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const fadd{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FADD CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||
| 25 | IR::FpControl control{ | ||
| 26 | .no_contraction = true, | ||
| 27 | .rounding = CastFpRounding(fp_rounding), | ||
| 28 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 29 | }; | ||
| 30 | IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; | ||
| 31 | if (sat) { | ||
| 32 | value = v.ir.FPSaturate(value); | ||
| 33 | } | ||
| 34 | v.F(fadd.dest_reg, value); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> neg_b; | ||
| 43 | BitField<46, 1, u64> abs_a; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> neg_a; | ||
| 46 | BitField<49, 1, u64> abs_b; | ||
| 47 | BitField<50, 1, u64> sat; | ||
| 48 | } const fadd{insn}; | ||
| 49 | |||
| 50 | FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||
| 51 | fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::FADD_reg(u64 insn) { | ||
| 56 | FADD(*this, insn, GetFloatReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::FADD_cbuf(u64 insn) { | ||
| 60 | FADD(*this, insn, GetFloatCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::FADD_imm(u64 insn) { | ||
| 64 | FADD(*this, insn, GetFloatImm20(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FADD32I(u64 insn) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<55, 1, u64> ftz; | ||
| 71 | BitField<56, 1, u64> neg_a; | ||
| 72 | BitField<54, 1, u64> abs_a; | ||
| 73 | BitField<52, 1, u64> cc; | ||
| 74 | BitField<53, 1, u64> neg_b; | ||
| 75 | BitField<57, 1, u64> abs_b; | ||
| 76 | } const fadd32i{insn}; | ||
| 77 | |||
| 78 | FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), | ||
| 79 | fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..7127ebf54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<47, 1, u64> ftz; | ||
| 18 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 19 | } const fcmp{insn}; | ||
| 20 | |||
| 21 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 22 | const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; | ||
| 23 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; | ||
| 24 | const IR::U32 src_reg{v.X(fcmp.src_reg)}; | ||
| 25 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 26 | |||
| 27 | v.X(fcmp.dest_reg, result); | ||
| 28 | } | ||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 31 | void TranslatorVisitor::FCMP_reg(u64 insn) { | ||
| 32 | FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::FCMP_rc(u64 insn) { | ||
| 36 | FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::FCMP_cr(u64 insn) { | ||
| 40 | FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); | ||
| 41 | } | ||
| 42 | |||
| 43 | void TranslatorVisitor::FCMP_imm(u64 insn) { | ||
| 44 | union { | ||
| 45 | u64 raw; | ||
| 46 | BitField<20, 19, u64> value; | ||
| 47 | BitField<56, 1, u64> is_negative; | ||
| 48 | } const fcmp{insn}; | ||
| 49 | const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; | ||
| 50 | const u32 value{static_cast<u32>(fcmp.value) << 12}; | ||
| 51 | |||
| 52 | FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..eece4f28f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | BitField<55, 1, u64> ftz; | ||
| 28 | } const fset{insn}; | ||
| 29 | |||
| 30 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; | ||
| 31 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); | ||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = false, | ||
| 34 | .rounding = IR::FpRounding::DontCare, | ||
| 35 | .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 36 | }; | ||
| 37 | |||
| 38 | IR::U1 pred{v.ir.GetPred(fset.pred)}; | ||
| 39 | if (fset.neg_pred != 0) { | ||
| 40 | pred = v.ir.LogicalNot(pred); | ||
| 41 | } | ||
| 42 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 48 | const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; | ||
| 49 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 50 | |||
| 51 | v.X(fset.dest_reg, result); | ||
| 52 | if (fset.cc != 0) { | ||
| 53 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 54 | v.SetZFlag(is_zero); | ||
| 55 | if (fset.bf != 0) { | ||
| 56 | v.ResetSFlag(); | ||
| 57 | } else { | ||
| 58 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 59 | } | ||
| 60 | v.ResetCFlag(); | ||
| 61 | v.ResetOFlag(); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::FSET_reg(u64 insn) { | ||
| 67 | FSET(*this, insn, GetFloatReg20(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::FSET_cbuf(u64 insn) { | ||
| 71 | FSET(*this, insn, GetFloatCbuf(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::FSET_imm(u64 insn) { | ||
| 75 | FSET(*this, insn, GetFloatImm20(insn)); | ||
| 76 | } | ||
| 77 | |||
| 78 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..02ab023c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | namespace { | ||
| 10 | enum class FloatFormat : u64 { | ||
| 11 | F16 = 1, | ||
| 12 | F32 = 2, | ||
| 13 | F64 = 3, | ||
| 14 | }; | ||
| 15 | |||
| 16 | enum class RoundingOp : u64 { | ||
| 17 | None = 0, | ||
| 18 | Pass = 3, | ||
| 19 | Round = 8, | ||
| 20 | Floor = 9, | ||
| 21 | Ceil = 10, | ||
| 22 | Trunc = 11, | ||
| 23 | }; | ||
| 24 | |||
| 25 | [[nodiscard]] u32 WidthSize(FloatFormat width) { | ||
| 26 | switch (width) { | ||
| 27 | case FloatFormat::F16: | ||
| 28 | return 16; | ||
| 29 | case FloatFormat::F32: | ||
| 30 | return 32; | ||
| 31 | case FloatFormat::F64: | ||
| 32 | return 64; | ||
| 33 | default: | ||
| 34 | throw NotImplementedException("Invalid width {}", width); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { | ||
| 39 | union { | ||
| 40 | u64 insn; | ||
| 41 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 42 | BitField<44, 1, u64> ftz; | ||
| 43 | BitField<45, 1, u64> neg; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<50, 1, u64> sat; | ||
| 46 | BitField<39, 4, u64> rounding_op; | ||
| 47 | BitField<39, 2, FpRounding> rounding; | ||
| 48 | BitField<10, 2, FloatFormat> src_size; | ||
| 49 | BitField<8, 2, FloatFormat> dst_size; | ||
| 50 | |||
| 51 | [[nodiscard]] RoundingOp RoundingOperation() const { | ||
| 52 | constexpr u64 rounding_mask = 0x0B; | ||
| 53 | return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); | ||
| 54 | } | ||
| 55 | } const f2f{insn}; | ||
| 56 | |||
| 57 | if (f2f.cc != 0) { | ||
| 58 | throw NotImplementedException("F2F CC"); | ||
| 59 | } | ||
| 60 | |||
| 61 | IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; | ||
| 62 | |||
| 63 | const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; | ||
| 64 | IR::FpControl fp_control{ | ||
| 65 | .no_contraction = false, | ||
| 66 | .rounding = IR::FpRounding::DontCare, | ||
| 67 | .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 68 | }; | ||
| 69 | if (f2f.src_size != f2f.dst_size) { | ||
| 70 | fp_control.rounding = CastFpRounding(f2f.rounding); | ||
| 71 | input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); | ||
| 72 | } else { | ||
| 73 | switch (f2f.RoundingOperation()) { | ||
| 74 | case RoundingOp::None: | ||
| 75 | case RoundingOp::Pass: | ||
| 76 | // Make sure NANs are handled properly | ||
| 77 | switch (f2f.src_size) { | ||
| 78 | case FloatFormat::F16: | ||
| 79 | input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); | ||
| 80 | break; | ||
| 81 | case FloatFormat::F32: | ||
| 82 | input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); | ||
| 83 | break; | ||
| 84 | case FloatFormat::F64: | ||
| 85 | input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | break; | ||
| 89 | case RoundingOp::Round: | ||
| 90 | input = v.ir.FPRoundEven(input, fp_control); | ||
| 91 | break; | ||
| 92 | case RoundingOp::Floor: | ||
| 93 | input = v.ir.FPFloor(input, fp_control); | ||
| 94 | break; | ||
| 95 | case RoundingOp::Ceil: | ||
| 96 | input = v.ir.FPCeil(input, fp_control); | ||
| 97 | break; | ||
| 98 | case RoundingOp::Trunc: | ||
| 99 | input = v.ir.FPTrunc(input, fp_control); | ||
| 100 | break; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | if (f2f.sat != 0 && !any_fp64) { | ||
| 106 | input = v.ir.FPSaturate(input); | ||
| 107 | } | ||
| 108 | |||
| 109 | switch (f2f.dst_size) { | ||
| 110 | case FloatFormat::F16: { | ||
| 111 | const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 112 | v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); | ||
| 113 | break; | ||
| 114 | } | ||
| 115 | case FloatFormat::F32: | ||
| 116 | v.F(f2f.dest_reg, input); | ||
| 117 | break; | ||
| 118 | case FloatFormat::F64: | ||
| 119 | v.D(f2f.dest_reg, input); | ||
| 120 | break; | ||
| 121 | default: | ||
| 122 | throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } // Anonymous namespace | ||
| 126 | |||
| 127 | void TranslatorVisitor::F2F_reg(u64 insn) { | ||
| 128 | union { | ||
| 129 | u64 insn; | ||
| 130 | BitField<49, 1, u64> abs; | ||
| 131 | BitField<10, 2, FloatFormat> src_size; | ||
| 132 | BitField<41, 1, u64> selector; | ||
| 133 | } const f2f{insn}; | ||
| 134 | |||
| 135 | IR::F16F32F64 src_a; | ||
| 136 | switch (f2f.src_size) { | ||
| 137 | case FloatFormat::F16: { | ||
| 138 | auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; | ||
| 139 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | case FloatFormat::F32: | ||
| 143 | src_a = GetFloatReg20(insn); | ||
| 144 | break; | ||
| 145 | case FloatFormat::F64: | ||
| 146 | src_a = GetDoubleReg20(insn); | ||
| 147 | break; | ||
| 148 | default: | ||
| 149 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 150 | } | ||
| 151 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::F2F_cbuf(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 insn; | ||
| 157 | BitField<49, 1, u64> abs; | ||
| 158 | BitField<10, 2, FloatFormat> src_size; | ||
| 159 | BitField<41, 1, u64> selector; | ||
| 160 | } const f2f{insn}; | ||
| 161 | |||
| 162 | IR::F16F32F64 src_a; | ||
| 163 | switch (f2f.src_size) { | ||
| 164 | case FloatFormat::F16: { | ||
| 165 | auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; | ||
| 166 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | case FloatFormat::F32: | ||
| 170 | src_a = GetFloatCbuf(insn); | ||
| 171 | break; | ||
| 172 | case FloatFormat::F64: | ||
| 173 | src_a = GetDoubleCbuf(insn); | ||
| 174 | break; | ||
| 175 | default: | ||
| 176 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 177 | } | ||
| 178 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 179 | } | ||
| 180 | |||
| 181 | void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { | ||
| 182 | union { | ||
| 183 | u64 insn; | ||
| 184 | BitField<49, 1, u64> abs; | ||
| 185 | BitField<10, 2, FloatFormat> src_size; | ||
| 186 | BitField<41, 1, u64> selector; | ||
| 187 | BitField<20, 19, u64> imm; | ||
| 188 | BitField<56, 1, u64> imm_neg; | ||
| 189 | } const f2f{insn}; | ||
| 190 | |||
| 191 | IR::F16F32F64 src_a; | ||
| 192 | switch (f2f.src_size) { | ||
| 193 | case FloatFormat::F16: { | ||
| 194 | const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; | ||
| 195 | const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; | ||
| 196 | src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; | ||
| 197 | if (f2f.imm_neg != 0) { | ||
| 198 | throw NotImplementedException("Neg bit on F16"); | ||
| 199 | } | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | case FloatFormat::F32: | ||
| 203 | src_a = GetFloatImm20(insn); | ||
| 204 | break; | ||
| 205 | case FloatFormat::F64: | ||
| 206 | src_a = GetDoubleImm20(insn); | ||
| 207 | break; | ||
| 208 | default: | ||
| 209 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 210 | } | ||
| 211 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..92b1ce015 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class DestFormat : u64 { | ||
| 15 | Invalid, | ||
| 16 | I16, | ||
| 17 | I32, | ||
| 18 | I64, | ||
| 19 | }; | ||
| 20 | enum class SrcFormat : u64 { | ||
| 21 | Invalid, | ||
| 22 | F16, | ||
| 23 | F32, | ||
| 24 | F64, | ||
| 25 | }; | ||
| 26 | enum class Rounding : u64 { | ||
| 27 | Round, | ||
| 28 | Floor, | ||
| 29 | Ceil, | ||
| 30 | Trunc, | ||
| 31 | }; | ||
| 32 | |||
| 33 | union F2I { | ||
| 34 | u64 raw; | ||
| 35 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 36 | BitField<8, 2, DestFormat> dest_format; | ||
| 37 | BitField<10, 2, SrcFormat> src_format; | ||
| 38 | BitField<12, 1, u64> is_signed; | ||
| 39 | BitField<39, 2, Rounding> rounding; | ||
| 40 | BitField<41, 1, u64> half; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> abs; | ||
| 43 | BitField<47, 1, u64> cc; | ||
| 44 | BitField<49, 1, u64> neg; | ||
| 45 | }; | ||
| 46 | |||
| 47 | size_t BitSize(DestFormat dest_format) { | ||
| 48 | switch (dest_format) { | ||
| 49 | case DestFormat::I16: | ||
| 50 | return 16; | ||
| 51 | case DestFormat::I32: | ||
| 52 | return 32; | ||
| 53 | case DestFormat::I64: | ||
| 54 | return 64; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid destination format {}", dest_format); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { | ||
| 61 | if (is_signed) { | ||
| 62 | switch (format) { | ||
| 63 | case DestFormat::I16: | ||
| 64 | return {static_cast<f64>(std::numeric_limits<s16>::max()), | ||
| 65 | static_cast<f64>(std::numeric_limits<s16>::min())}; | ||
| 66 | case DestFormat::I32: | ||
| 67 | return {static_cast<f64>(std::numeric_limits<s32>::max()), | ||
| 68 | static_cast<f64>(std::numeric_limits<s32>::min())}; | ||
| 69 | case DestFormat::I64: | ||
| 70 | return {static_cast<f64>(std::numeric_limits<s64>::max()), | ||
| 71 | static_cast<f64>(std::numeric_limits<s64>::min())}; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } else { | ||
| 76 | switch (format) { | ||
| 77 | case DestFormat::I16: | ||
| 78 | return {static_cast<f64>(std::numeric_limits<u16>::max()), | ||
| 79 | static_cast<f64>(std::numeric_limits<u16>::min())}; | ||
| 80 | case DestFormat::I32: | ||
| 81 | return {static_cast<f64>(std::numeric_limits<u32>::max()), | ||
| 82 | static_cast<f64>(std::numeric_limits<u32>::min())}; | ||
| 83 | case DestFormat::I64: | ||
| 84 | return {static_cast<f64>(std::numeric_limits<u64>::max()), | ||
| 85 | static_cast<f64>(std::numeric_limits<u64>::min())}; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | throw NotImplementedException("Invalid destination format {}", format); | ||
| 91 | } | ||
| 92 | |||
| 93 | IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { | ||
| 94 | union { | ||
| 95 | u64 raw; | ||
| 96 | BitField<20, 14, s64> offset; | ||
| 97 | BitField<34, 5, u64> binding; | ||
| 98 | } const cbuf{insn}; | ||
| 99 | if (cbuf.binding >= 18) { | ||
| 100 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 101 | } | ||
| 102 | if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { | ||
| 103 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); | ||
| 104 | } | ||
| 105 | if (cbuf.offset % 2 != 0) { | ||
| 106 | throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); | ||
| 107 | } | ||
| 108 | const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))}; | ||
| 109 | const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)}; | ||
| 110 | const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; | ||
| 111 | const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; | ||
| 112 | return v.ir.PackDouble2x32(vector); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | ||
| 116 | // F2I is used to convert from a floating point value to an integer | ||
| 117 | const F2I f2i{insn}; | ||
| 118 | |||
| 119 | const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && | ||
| 120 | f2i.dest_format != DestFormat::I64}; | ||
| 121 | IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; | ||
| 122 | if (denorm_cares) { | ||
| 123 | fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; | ||
| 124 | } | ||
| 125 | const IR::FpControl fp_control{ | ||
| 126 | .no_contraction = true, | ||
| 127 | .rounding = IR::FpRounding::DontCare, | ||
| 128 | .fmz_mode = fmz_mode, | ||
| 129 | }; | ||
| 130 | const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; | ||
| 131 | const IR::F16F32F64 rounded_value{[&] { | ||
| 132 | switch (f2i.rounding) { | ||
| 133 | case Rounding::Round: | ||
| 134 | return v.ir.FPRoundEven(op_a, fp_control); | ||
| 135 | case Rounding::Floor: | ||
| 136 | return v.ir.FPFloor(op_a, fp_control); | ||
| 137 | case Rounding::Ceil: | ||
| 138 | return v.ir.FPCeil(op_a, fp_control); | ||
| 139 | case Rounding::Trunc: | ||
| 140 | return v.ir.FPTrunc(op_a, fp_control); | ||
| 141 | default: | ||
| 142 | throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); | ||
| 143 | } | ||
| 144 | }()}; | ||
| 145 | const bool is_signed{f2i.is_signed != 0}; | ||
| 146 | const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); | ||
| 147 | |||
| 148 | IR::F16F32F64 intermediate; | ||
| 149 | switch (f2i.src_format) { | ||
| 150 | case SrcFormat::F16: { | ||
| 151 | const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; | ||
| 152 | const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; | ||
| 153 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case SrcFormat::F32: { | ||
| 157 | const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; | ||
| 158 | const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; | ||
| 159 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case SrcFormat::F64: { | ||
| 163 | const IR::F64 max_val{v.ir.Imm64(max_bound)}; | ||
| 164 | const IR::F64 min_val{v.ir.Imm64(min_bound)}; | ||
| 165 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | default: | ||
| 169 | throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); | ||
| 170 | } | ||
| 171 | |||
| 172 | const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; | ||
| 173 | IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; | ||
| 174 | |||
| 175 | bool handled_special_case = false; | ||
| 176 | const bool special_nan_cases = | ||
| 177 | (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); | ||
| 178 | if (special_nan_cases) { | ||
| 179 | if (f2i.dest_format == DestFormat::I32) { | ||
| 180 | handled_special_case = true; | ||
| 181 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; | ||
| 182 | } else if (f2i.dest_format == DestFormat::I64) { | ||
| 183 | handled_special_case = true; | ||
| 184 | result = IR::U64{ | ||
| 185 | v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | if (!handled_special_case && is_signed) { | ||
| 189 | if (bitsize != 64) { | ||
| 190 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; | ||
| 191 | } else { | ||
| 192 | result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | if (bitsize == 64) { | ||
| 197 | v.L(f2i.dest_reg, result); | ||
| 198 | } else { | ||
| 199 | v.X(f2i.dest_reg, result); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (f2i.cc != 0) { | ||
| 203 | throw NotImplementedException("F2I CC"); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } // Anonymous namespace | ||
| 207 | |||
| 208 | void TranslatorVisitor::F2I_reg(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | F2I base; | ||
| 212 | BitField<20, 8, IR::Reg> src_reg; | ||
| 213 | } const f2i{insn}; | ||
| 214 | |||
| 215 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 216 | switch (f2i.base.src_format) { | ||
| 217 | case SrcFormat::F16: | ||
| 218 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; | ||
| 219 | case SrcFormat::F32: | ||
| 220 | return F(f2i.src_reg); | ||
| 221 | case SrcFormat::F64: | ||
| 222 | return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); | ||
| 223 | default: | ||
| 224 | throw NotImplementedException("Invalid F2I source format {}", | ||
| 225 | f2i.base.src_format.Value()); | ||
| 226 | } | ||
| 227 | }()}; | ||
| 228 | TranslateF2I(*this, insn, op_a); | ||
| 229 | } | ||
| 230 | |||
| 231 | void TranslatorVisitor::F2I_cbuf(u64 insn) { | ||
| 232 | const F2I f2i{insn}; | ||
| 233 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 234 | switch (f2i.src_format) { | ||
| 235 | case SrcFormat::F16: | ||
| 236 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; | ||
| 237 | case SrcFormat::F32: | ||
| 238 | return GetFloatCbuf(insn); | ||
| 239 | case SrcFormat::F64: { | ||
| 240 | return UnpackCbuf(*this, insn); | ||
| 241 | } | ||
| 242 | default: | ||
| 243 | throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); | ||
| 244 | } | ||
| 245 | }()}; | ||
| 246 | TranslateF2I(*this, insn, op_a); | ||
| 247 | } | ||
| 248 | |||
| 249 | void TranslatorVisitor::F2I_imm(u64) { | ||
| 250 | throw NotImplementedException("{}", Opcode::F2I_imm); | ||
| 251 | } | ||
| 252 | |||
| 253 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fa2a7807b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, | ||
| 13 | bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const ffma{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FFMA CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 25 | const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||
| 26 | const IR::FpControl fp_control{ | ||
| 27 | .no_contraction = true, | ||
| 28 | .rounding = CastFpRounding(fp_rounding), | ||
| 29 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 30 | }; | ||
| 31 | IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; | ||
| 32 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 33 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 34 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 35 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 36 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 37 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 38 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 39 | value = IR::F32{v.ir.Select(any_zero, op_c, value)}; | ||
| 40 | } | ||
| 41 | if (sat) { | ||
| 42 | value = v.ir.FPSaturate(value); | ||
| 43 | } | ||
| 44 | v.F(ffma.dest_reg, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<47, 1, u64> cc; | ||
| 51 | BitField<48, 1, u64> neg_b; | ||
| 52 | BitField<49, 1, u64> neg_c; | ||
| 53 | BitField<50, 1, u64> sat; | ||
| 54 | BitField<51, 2, FpRounding> fp_rounding; | ||
| 55 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 56 | } const ffma{insn}; | ||
| 57 | |||
| 58 | FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||
| 59 | ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::FFMA_reg(u64 insn) { | ||
| 64 | FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FFMA_rc(u64 insn) { | ||
| 68 | FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | void TranslatorVisitor::FFMA_cr(u64 insn) { | ||
| 72 | FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void TranslatorVisitor::FFMA_imm(u64 insn) { | ||
| 76 | FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); | ||
| 77 | } | ||
| 78 | |||
| 79 | void TranslatorVisitor::FFMA32I(u64 insn) { | ||
| 80 | union { | ||
| 81 | u64 raw; | ||
| 82 | BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register | ||
| 83 | BitField<52, 1, u64> cc; | ||
| 84 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 85 | BitField<55, 1, u64> sat; | ||
| 86 | BitField<56, 1, u64> neg_a; | ||
| 87 | BitField<57, 1, u64> neg_c; | ||
| 88 | } const ffma32i{insn}; | ||
| 89 | |||
| 90 | FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, | ||
| 91 | ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c0d6ee5af --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<45, 1, u64> negate_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> negate_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const fmnmx{insn}; | ||
| 25 | |||
| 26 | if (fmnmx.cc) { | ||
| 27 | throw NotImplementedException("FMNMX CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; | ||
| 31 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; | ||
| 32 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; | ||
| 33 | |||
| 34 | const IR::FpControl control{ | ||
| 35 | .no_contraction = false, | ||
| 36 | .rounding = IR::FpRounding::DontCare, | ||
| 37 | .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 38 | }; | ||
| 39 | IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; | ||
| 40 | IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; | ||
| 41 | |||
| 42 | if (fmnmx.neg_pred != 0) { | ||
| 43 | std::swap(min, max); | ||
| 44 | } | ||
| 45 | |||
| 46 | v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | void TranslatorVisitor::FMNMX_reg(u64 insn) { | ||
| 51 | FMNMX(*this, insn, GetFloatReg20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::FMNMX_cbuf(u64 insn) { | ||
| 55 | FMNMX(*this, insn, GetFloatCbuf(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::FMNMX_imm(u64 insn) { | ||
| 59 | FMNMX(*this, insn, GetFloatImm20(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..2f8605619 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class Operation : u64 { | ||
| 14 | Cos = 0, | ||
| 15 | Sin = 1, | ||
| 16 | Ex2 = 2, // Base 2 exponent | ||
| 17 | Lg2 = 3, // Base 2 logarithm | ||
| 18 | Rcp = 4, // Reciprocal | ||
| 19 | Rsq = 5, // Reciprocal square root | ||
| 20 | Rcp64H = 6, // 64-bit reciprocal | ||
| 21 | Rsq64H = 7, // 64-bit reciprocal square root | ||
| 22 | Sqrt = 8, | ||
| 23 | }; | ||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::MUFU(u64 insn) { | ||
| 27 | // MUFU is used to implement a bunch of special functions. See Operation. | ||
| 28 | union { | ||
| 29 | u64 raw; | ||
| 30 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 31 | BitField<8, 8, IR::Reg> src_reg; | ||
| 32 | BitField<20, 4, Operation> operation; | ||
| 33 | BitField<46, 1, u64> abs; | ||
| 34 | BitField<48, 1, u64> neg; | ||
| 35 | BitField<50, 1, u64> sat; | ||
| 36 | } const mufu{insn}; | ||
| 37 | |||
| 38 | const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; | ||
| 39 | IR::F32 value{[&]() -> IR::F32 { | ||
| 40 | switch (mufu.operation) { | ||
| 41 | case Operation::Cos: | ||
| 42 | return ir.FPCos(op_a); | ||
| 43 | case Operation::Sin: | ||
| 44 | return ir.FPSin(op_a); | ||
| 45 | case Operation::Ex2: | ||
| 46 | return ir.FPExp2(op_a); | ||
| 47 | case Operation::Lg2: | ||
| 48 | return ir.FPLog2(op_a); | ||
| 49 | case Operation::Rcp: | ||
| 50 | return ir.FPRecip(op_a); | ||
| 51 | case Operation::Rsq: | ||
| 52 | return ir.FPRecipSqrt(op_a); | ||
| 53 | case Operation::Rcp64H: | ||
| 54 | throw NotImplementedException("MUFU.RCP64H"); | ||
| 55 | case Operation::Rsq64H: | ||
| 56 | throw NotImplementedException("MUFU.RSQ64H"); | ||
| 57 | case Operation::Sqrt: | ||
| 58 | return ir.FPSqrt(op_a); | ||
| 59 | default: | ||
| 60 | throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); | ||
| 61 | } | ||
| 62 | }()}; | ||
| 63 | |||
| 64 | if (mufu.sat) { | ||
| 65 | value = ir.FPSaturate(value); | ||
| 66 | } | ||
| 67 | |||
| 68 | F(mufu.dest_reg, value); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..06226b7ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Scale : u64 { | ||
| 15 | None, | ||
| 16 | D2, | ||
| 17 | D4, | ||
| 18 | D8, | ||
| 19 | M8, | ||
| 20 | M4, | ||
| 21 | M2, | ||
| 22 | INVALIDSCALE37, | ||
| 23 | }; | ||
| 24 | |||
| 25 | float ScaleFactor(Scale scale) { | ||
| 26 | switch (scale) { | ||
| 27 | case Scale::None: | ||
| 28 | return 1.0f; | ||
| 29 | case Scale::D2: | ||
| 30 | return 1.0f / 2.0f; | ||
| 31 | case Scale::D4: | ||
| 32 | return 1.0f / 4.0f; | ||
| 33 | case Scale::D8: | ||
| 34 | return 1.0f / 8.0f; | ||
| 35 | case Scale::M8: | ||
| 36 | return 8.0f; | ||
| 37 | case Scale::M4: | ||
| 38 | return 4.0f; | ||
| 39 | case Scale::M2: | ||
| 40 | return 2.0f; | ||
| 41 | case Scale::INVALIDSCALE37: | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | throw NotImplementedException("Invalid FMUL scale {}", scale); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, | ||
| 48 | FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 52 | BitField<8, 8, IR::Reg> src_a; | ||
| 53 | } const fmul{insn}; | ||
| 54 | |||
| 55 | if (cc) { | ||
| 56 | throw NotImplementedException("FMUL CC"); | ||
| 57 | } | ||
| 58 | IR::F32 op_a{v.F(fmul.src_a)}; | ||
| 59 | if (scale != Scale::None) { | ||
| 60 | if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||
| 61 | throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||
| 62 | } | ||
| 63 | op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||
| 64 | } | ||
| 65 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 66 | const IR::FpControl fp_control{ | ||
| 67 | .no_contraction = true, | ||
| 68 | .rounding = CastFpRounding(fp_rounding), | ||
| 69 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 70 | }; | ||
| 71 | IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; | ||
| 72 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 73 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 74 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 75 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 76 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 77 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 78 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 79 | value = IR::F32{v.ir.Select(any_zero, zero, value)}; | ||
| 80 | } | ||
| 81 | if (sat) { | ||
| 82 | value = v.ir.FPSaturate(value); | ||
| 83 | } | ||
| 84 | v.F(fmul.dest_reg, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 88 | union { | ||
| 89 | u64 raw; | ||
| 90 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 91 | BitField<41, 3, Scale> scale; | ||
| 92 | BitField<44, 2, FmzMode> fmz; | ||
| 93 | BitField<47, 1, u64> cc; | ||
| 94 | BitField<48, 1, u64> neg_b; | ||
| 95 | BitField<50, 1, u64> sat; | ||
| 96 | } const fmul{insn}; | ||
| 97 | |||
| 98 | FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||
| 99 | fmul.neg_b != 0); | ||
| 100 | } | ||
| 101 | } // Anonymous namespace | ||
| 102 | |||
| 103 | void TranslatorVisitor::FMUL_reg(u64 insn) { | ||
| 104 | return FMUL(*this, insn, GetFloatReg20(insn)); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::FMUL_cbuf(u64 insn) { | ||
| 108 | return FMUL(*this, insn, GetFloatCbuf(insn)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::FMUL_imm(u64 insn) { | ||
| 112 | return FMUL(*this, insn, GetFloatImm20(insn)); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::FMUL32I(u64 insn) { | ||
| 116 | union { | ||
| 117 | u64 raw; | ||
| 118 | BitField<52, 1, u64> cc; | ||
| 119 | BitField<53, 2, FmzMode> fmz; | ||
| 120 | BitField<55, 1, u64> sat; | ||
| 121 | } const fmul32i{insn}; | ||
| 122 | |||
| 123 | FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, | ||
| 124 | fmul32i.sat != 0, fmul32i.cc != 0, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | SINCOS, | ||
| 13 | EX2, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { | ||
| 17 | union { | ||
| 18 | u64 raw; | ||
| 19 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 20 | BitField<39, 1, Mode> mode; | ||
| 21 | BitField<45, 1, u64> neg; | ||
| 22 | BitField<49, 1, u64> abs; | ||
| 23 | } const rro{insn}; | ||
| 24 | |||
| 25 | v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | void TranslatorVisitor::RRO_reg(u64 insn) { | ||
| 30 | RRO(*this, insn, GetFloatReg20(insn)); | ||
| 31 | } | ||
| 32 | |||
| 33 | void TranslatorVisitor::RRO_cbuf(u64 insn) { | ||
| 34 | RRO(*this, insn, GetFloatCbuf(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::RRO_imm(u64) { | ||
| 38 | throw NotImplementedException("RRO (imm)"); | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..5f93a1513 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<47, 1, u64> ftz; | ||
| 26 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 27 | } const fsetp{insn}; | ||
| 28 | |||
| 29 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; | ||
| 30 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); | ||
| 31 | const IR::FpControl control{ | ||
| 32 | .no_contraction = false, | ||
| 33 | .rounding = IR::FpRounding::DontCare, | ||
| 34 | .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 35 | }; | ||
| 36 | |||
| 37 | const BooleanOp bop{fsetp.bop}; | ||
| 38 | const FPCompareOp compare_op{fsetp.compare_op}; | ||
| 39 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; | ||
| 40 | const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; | ||
| 41 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 42 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 43 | v.ir.SetPred(fsetp.dest_pred_a, result_a); | ||
| 44 | v.ir.SetPred(fsetp.dest_pred_b, result_b); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::FSETP_reg(u64 insn) { | ||
| 49 | FSETP(*this, insn, GetFloatReg20(insn)); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::FSETP_cbuf(u64 insn) { | ||
| 53 | FSETP(*this, insn, GetFloatCbuf(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::FSETP_imm(u64 insn) { | ||
| 57 | FSETP(*this, insn, GetFloatImm20(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..7550a8d4c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::FSWZADD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<28, 8, u64> swizzle; | ||
| 16 | BitField<38, 1, u64> ndv; | ||
| 17 | BitField<39, 2, FpRounding> round; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | } const fswzadd{insn}; | ||
| 21 | |||
| 22 | if (fswzadd.ndv != 0) { | ||
| 23 | throw NotImplementedException("FSWZADD NDV"); | ||
| 24 | } | ||
| 25 | |||
| 26 | const IR::F32 src_a{GetFloatReg8(insn)}; | ||
| 27 | const IR::F32 src_b{GetFloatReg20(insn)}; | ||
| 28 | const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; | ||
| 29 | |||
| 30 | const IR::FpControl fp_control{ | ||
| 31 | .no_contraction = false, | ||
| 32 | .rounding = CastFpRounding(fswzadd.round), | ||
| 33 | .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 34 | }; | ||
| 35 | |||
| 36 | const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; | ||
| 37 | F(fswzadd.dest_reg, result); | ||
| 38 | |||
| 39 | if (fswzadd.cc != 0) { | ||
| 40 | throw NotImplementedException("FSWZADD CC"); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..f2738a93b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 11 | union { | ||
| 12 | u64 raw; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a; | ||
| 15 | } const hadd2{insn}; | ||
| 16 | |||
| 17 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 18 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 19 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 20 | if (promotion) { | ||
| 21 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 22 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 23 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 24 | } | ||
| 25 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 26 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 27 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 31 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 32 | |||
| 33 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 34 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 35 | |||
| 36 | const IR::FpControl fp_control{ | ||
| 37 | .no_contraction = true, | ||
| 38 | .rounding = IR::FpRounding::DontCare, | ||
| 39 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 40 | }; | ||
| 41 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 42 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 43 | if (sat) { | ||
| 44 | lhs = v.ir.FPSaturate(lhs); | ||
| 45 | rhs = v.ir.FPSaturate(rhs); | ||
| 46 | } | ||
| 47 | if (promotion) { | ||
| 48 | lhs = v.ir.FPConvert(16, lhs); | ||
| 49 | rhs = v.ir.FPConvert(16, rhs); | ||
| 50 | } | ||
| 51 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, | ||
| 55 | const IR::U32& src_b) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<49, 2, Merge> merge; | ||
| 59 | BitField<39, 1, u64> ftz; | ||
| 60 | BitField<43, 1, u64> neg_a; | ||
| 61 | BitField<44, 1, u64> abs_a; | ||
| 62 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 63 | } const hadd2{insn}; | ||
| 64 | |||
| 65 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, | ||
| 66 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<32, 1, u64> sat; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<30, 1, u64> abs_b; | ||
| 76 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 77 | } const hadd2{insn}; | ||
| 78 | |||
| 79 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 80 | GetReg20(insn)); | ||
| 81 | } | ||
| 82 | |||
| 83 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<52, 1, u64> sat; | ||
| 87 | BitField<56, 1, u64> neg_b; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | } const hadd2{insn}; | ||
| 90 | |||
| 91 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 92 | GetCbuf(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<52, 1, u64> sat; | ||
| 99 | BitField<56, 1, u64> neg_high; | ||
| 100 | BitField<30, 9, u64> high; | ||
| 101 | BitField<29, 1, u64> neg_low; | ||
| 102 | BitField<20, 9, u64> low; | ||
| 103 | } const hadd2{insn}; | ||
| 104 | |||
| 105 | const u32 imm{ | ||
| 106 | static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 107 | static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 108 | HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 112 | union { | ||
| 113 | u64 raw; | ||
| 114 | BitField<55, 1, u64> ftz; | ||
| 115 | BitField<52, 1, u64> sat; | ||
| 116 | BitField<56, 1, u64> neg_a; | ||
| 117 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 118 | BitField<20, 32, u64> imm32; | ||
| 119 | } const hadd2{insn}; | ||
| 120 | |||
| 121 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 122 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 123 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 124 | } | ||
| 125 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fd7986701 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, | ||
| 10 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, | ||
| 11 | bool sat, HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hfma2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; | ||
| 21 | const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; | ||
| 22 | if (promotion) { | ||
| 23 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 24 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 25 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 26 | } | ||
| 27 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 28 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 29 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 30 | } | ||
| 31 | if (lhs_c.Type() == IR::Type::F16) { | ||
| 32 | lhs_c = v.ir.FPConvert(32, lhs_c); | ||
| 33 | rhs_c = v.ir.FPConvert(32, rhs_c); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); | ||
| 38 | rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); | ||
| 39 | |||
| 40 | lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); | ||
| 41 | rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); | ||
| 42 | |||
| 43 | const IR::FpControl fp_control{ | ||
| 44 | .no_contraction = true, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 47 | }; | ||
| 48 | IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; | ||
| 49 | IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; | ||
| 50 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 51 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 52 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 53 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 54 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 55 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 56 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 57 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; | ||
| 58 | |||
| 59 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 60 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 61 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 62 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; | ||
| 63 | } | ||
| 64 | if (sat) { | ||
| 65 | lhs = v.ir.FPSaturate(lhs); | ||
| 66 | rhs = v.ir.FPSaturate(rhs); | ||
| 67 | } | ||
| 68 | if (promotion) { | ||
| 69 | lhs = v.ir.FPConvert(16, lhs); | ||
| 70 | rhs = v.ir.FPConvert(16, rhs); | ||
| 71 | } | ||
| 72 | v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, | ||
| 76 | Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, | ||
| 77 | HalfPrecision precision) { | ||
| 78 | union { | ||
| 79 | u64 raw; | ||
| 80 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 81 | BitField<49, 2, Merge> merge; | ||
| 82 | } const hfma2{insn}; | ||
| 83 | |||
| 84 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, | ||
| 85 | sat, precision); | ||
| 86 | } | ||
| 87 | } // Anonymous namespace | ||
| 88 | |||
| 89 | void TranslatorVisitor::HFMA2_reg(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 93 | BitField<32, 1, u64> saturate; | ||
| 94 | BitField<31, 1, u64> neg_b; | ||
| 95 | BitField<30, 1, u64> neg_c; | ||
| 96 | BitField<35, 2, Swizzle> swizzle_c; | ||
| 97 | BitField<37, 2, HalfPrecision> precision; | ||
| 98 | } const hfma2{insn}; | ||
| 99 | |||
| 100 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, | ||
| 101 | GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::HFMA2_rc(u64 insn) { | ||
| 105 | union { | ||
| 106 | u64 raw; | ||
| 107 | BitField<51, 1, u64> neg_c; | ||
| 108 | BitField<52, 1, u64> saturate; | ||
| 109 | BitField<53, 2, Swizzle> swizzle_b; | ||
| 110 | BitField<56, 1, u64> neg_b; | ||
| 111 | BitField<57, 2, HalfPrecision> precision; | ||
| 112 | } const hfma2{insn}; | ||
| 113 | |||
| 114 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, | ||
| 115 | GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::HFMA2_cr(u64 insn) { | ||
| 119 | union { | ||
| 120 | u64 raw; | ||
| 121 | BitField<51, 1, u64> neg_c; | ||
| 122 | BitField<52, 1, u64> saturate; | ||
| 123 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 124 | BitField<56, 1, u64> neg_b; | ||
| 125 | BitField<57, 2, HalfPrecision> precision; | ||
| 126 | } const hfma2{insn}; | ||
| 127 | |||
| 128 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, | ||
| 129 | GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::HFMA2_imm(u64 insn) { | ||
| 133 | union { | ||
| 134 | u64 raw; | ||
| 135 | BitField<51, 1, u64> neg_c; | ||
| 136 | BitField<52, 1, u64> saturate; | ||
| 137 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 138 | |||
| 139 | BitField<56, 1, u64> neg_high; | ||
| 140 | BitField<30, 9, u64> high; | ||
| 141 | BitField<29, 1, u64> neg_low; | ||
| 142 | BitField<20, 9, u64> low; | ||
| 143 | BitField<57, 2, HalfPrecision> precision; | ||
| 144 | } const hfma2{insn}; | ||
| 145 | |||
| 146 | const u32 imm{ | ||
| 147 | static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 148 | static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 149 | |||
| 150 | HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), | ||
| 151 | GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::HFMA2_32I(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<0, 8, IR::Reg> src_c; | ||
| 158 | BitField<20, 32, u64> imm32; | ||
| 159 | BitField<52, 1, u64> neg_c; | ||
| 160 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 161 | BitField<55, 2, HalfPrecision> precision; | ||
| 162 | } const hfma2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hfma2.imm32)}; | ||
| 165 | HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, | ||
| 166 | Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..0dbeb7f56 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | |||
| 9 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { | ||
| 10 | switch (precision) { | ||
| 11 | case HalfPrecision::None: | ||
| 12 | return IR::FmzMode::None; | ||
| 13 | case HalfPrecision::FTZ: | ||
| 14 | return IR::FmzMode::FTZ; | ||
| 15 | case HalfPrecision::FMZ: | ||
| 16 | return IR::FmzMode::FMZ; | ||
| 17 | default: | ||
| 18 | return IR::FmzMode::DontCare; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 23 | switch (swizzle) { | ||
| 24 | case Swizzle::H1_H0: { | ||
| 25 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 26 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 27 | } | ||
| 28 | case Swizzle::H0_H0: { | ||
| 29 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 30 | return {scalar, scalar}; | ||
| 31 | } | ||
| 32 | case Swizzle::H1_H1: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::F32: { | ||
| 37 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 42 | } | ||
| 43 | |||
| 44 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 45 | Merge merge) { | ||
| 46 | switch (merge) { | ||
| 47 | case Merge::H1_H0: | ||
| 48 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 49 | case Merge::F32: | ||
| 50 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 51 | case Merge::MRG_H0: | ||
| 52 | case Merge::MRG_H1: { | ||
| 53 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 54 | const bool is_h0{merge == Merge::MRG_H0}; | ||
| 55 | const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; | ||
| 56 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..59da56a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | |||
| 15 | enum class Merge : u64 { | ||
| 16 | H1_H0, | ||
| 17 | F32, | ||
| 18 | MRG_H0, | ||
| 19 | MRG_H1, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class Swizzle : u64 { | ||
| 23 | H1_H0, | ||
| 24 | F32, | ||
| 25 | H0_H0, | ||
| 26 | H1_H1, | ||
| 27 | }; | ||
| 28 | |||
| 29 | enum class HalfPrecision : u64 { | ||
| 30 | None = 0, | ||
| 31 | FTZ = 1, | ||
| 32 | FMZ = 2, | ||
| 33 | }; | ||
| 34 | |||
| 35 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); | ||
| 36 | |||
| 37 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); | ||
| 38 | |||
| 39 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 40 | Merge merge); | ||
| 41 | |||
| 42 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..3f548ce76 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, | ||
| 11 | HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hmul2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 21 | if (promotion) { | ||
| 22 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 23 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 24 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 25 | } | ||
| 26 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 27 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 28 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 32 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 33 | |||
| 34 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 35 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 36 | |||
| 37 | const IR::FpControl fp_control{ | ||
| 38 | .no_contraction = true, | ||
| 39 | .rounding = IR::FpRounding::DontCare, | ||
| 40 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 41 | }; | ||
| 42 | IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; | ||
| 43 | IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; | ||
| 44 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 45 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 46 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 47 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 48 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 49 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 50 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 51 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; | ||
| 52 | |||
| 53 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 54 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 55 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 56 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; | ||
| 57 | } | ||
| 58 | if (sat) { | ||
| 59 | lhs = v.ir.FPSaturate(lhs); | ||
| 60 | rhs = v.ir.FPSaturate(rhs); | ||
| 61 | } | ||
| 62 | if (promotion) { | ||
| 63 | lhs = v.ir.FPConvert(16, lhs); | ||
| 64 | rhs = v.ir.FPConvert(16, rhs); | ||
| 65 | } | ||
| 66 | v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); | ||
| 67 | } | ||
| 68 | |||
| 69 | void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, | ||
| 70 | Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<49, 2, Merge> merge; | ||
| 74 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 75 | BitField<39, 2, HalfPrecision> precision; | ||
| 76 | } const hmul2{insn}; | ||
| 77 | |||
| 78 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, | ||
| 79 | hmul2.precision); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::HMUL2_reg(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<32, 1, u64> sat; | ||
| 87 | BitField<31, 1, u64> neg_b; | ||
| 88 | BitField<30, 1, u64> abs_b; | ||
| 89 | BitField<44, 1, u64> abs_a; | ||
| 90 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 91 | } const hmul2{insn}; | ||
| 92 | |||
| 93 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, | ||
| 94 | hmul2.swizzle_b, GetReg20(insn)); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HMUL2_cbuf(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 raw; | ||
| 100 | BitField<52, 1, u64> sat; | ||
| 101 | BitField<54, 1, u64> abs_b; | ||
| 102 | BitField<43, 1, u64> neg_a; | ||
| 103 | BitField<44, 1, u64> abs_a; | ||
| 104 | } const hmul2{insn}; | ||
| 105 | |||
| 106 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, | ||
| 107 | Swizzle::F32, GetCbuf(insn)); | ||
| 108 | } | ||
| 109 | |||
| 110 | void TranslatorVisitor::HMUL2_imm(u64 insn) { | ||
| 111 | union { | ||
| 112 | u64 raw; | ||
| 113 | BitField<52, 1, u64> sat; | ||
| 114 | BitField<56, 1, u64> neg_high; | ||
| 115 | BitField<30, 9, u64> high; | ||
| 116 | BitField<29, 1, u64> neg_low; | ||
| 117 | BitField<20, 9, u64> low; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | } const hmul2{insn}; | ||
| 121 | |||
| 122 | const u32 imm{ | ||
| 123 | static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 124 | static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 125 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, | ||
| 126 | Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 127 | } | ||
| 128 | |||
| 129 | void TranslatorVisitor::HMUL2_32I(u64 insn) { | ||
| 130 | union { | ||
| 131 | u64 raw; | ||
| 132 | BitField<55, 2, HalfPrecision> precision; | ||
| 133 | BitField<52, 1, u64> sat; | ||
| 134 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 135 | BitField<20, 32, u64> imm32; | ||
| 136 | } const hmul2{insn}; | ||
| 137 | |||
| 138 | const u32 imm{static_cast<u32>(hmul2.imm32)}; | ||
| 139 | HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, | ||
| 140 | Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); | ||
| 141 | } | ||
| 142 | |||
| 143 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..cca5b831f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp | |||
| @@ -0,0 +1,117 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, | ||
| 10 | bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 15 | BitField<39, 3, IR::Pred> pred; | ||
| 16 | BitField<42, 1, u64> neg_pred; | ||
| 17 | BitField<43, 1, u64> neg_a; | ||
| 18 | BitField<45, 2, BooleanOp> bop; | ||
| 19 | BitField<44, 1, u64> abs_a; | ||
| 20 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 21 | } const hset2{insn}; | ||
| 22 | |||
| 23 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; | ||
| 24 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 25 | |||
| 26 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 27 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 28 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 29 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 30 | } | ||
| 31 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 32 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 33 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 38 | rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 39 | |||
| 40 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 41 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 42 | |||
| 43 | const IR::FpControl control{ | ||
| 44 | .no_contraction = false, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 47 | }; | ||
| 48 | |||
| 49 | IR::U1 pred{v.ir.GetPred(hset2.pred)}; | ||
| 50 | if (hset2.neg_pred != 0) { | ||
| 51 | pred = v.ir.LogicalNot(pred); | ||
| 52 | } | ||
| 53 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 54 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 55 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; | ||
| 56 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; | ||
| 57 | |||
| 58 | const u32 true_value = bf ? 0x3c00 : 0xffff; | ||
| 59 | const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; | ||
| 60 | const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; | ||
| 61 | const IR::U32 fail_result{v.ir.Imm32(0)}; | ||
| 62 | const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; | ||
| 63 | const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; | ||
| 64 | |||
| 65 | v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); | ||
| 66 | } | ||
| 67 | } // Anonymous namespace | ||
| 68 | |||
| 69 | void TranslatorVisitor::HSET2_reg(u64 insn) { | ||
| 70 | union { | ||
| 71 | u64 insn; | ||
| 72 | BitField<30, 1, u64> abs_b; | ||
| 73 | BitField<49, 1, u64> bf; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<50, 1, u64> ftz; | ||
| 76 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 77 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 78 | } const hset2{insn}; | ||
| 79 | |||
| 80 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, | ||
| 81 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSET2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> bf; | ||
| 88 | BitField<56, 1, u64> neg_b; | ||
| 89 | BitField<54, 1, u64> ftz; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hset2{insn}; | ||
| 92 | |||
| 93 | HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, | ||
| 94 | hset2.compare_op, Swizzle::F32); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSET2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> bf; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hset2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{ | ||
| 110 | static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 112 | |||
| 113 | HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, | ||
| 114 | Swizzle::H1_H0); | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..b3931dae3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, | ||
| 10 | Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 14 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 1, u64> neg_a; | ||
| 19 | BitField<45, 2, BooleanOp> bop; | ||
| 20 | BitField<44, 1, u64> abs_a; | ||
| 21 | BitField<6, 1, u64> ftz; | ||
| 22 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 23 | } const hsetp2{insn}; | ||
| 24 | |||
| 25 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; | ||
| 26 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 27 | |||
| 28 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 29 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 30 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 31 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 32 | } | ||
| 33 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 34 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 35 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 40 | rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 41 | |||
| 42 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 43 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 44 | |||
| 45 | const IR::FpControl control{ | ||
| 46 | .no_contraction = false, | ||
| 47 | .rounding = IR::FpRounding::DontCare, | ||
| 48 | .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 49 | }; | ||
| 50 | |||
| 51 | IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; | ||
| 52 | if (hsetp2.neg_pred != 0) { | ||
| 53 | pred = v.ir.LogicalNot(pred); | ||
| 54 | } | ||
| 55 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 56 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 57 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; | ||
| 58 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; | ||
| 59 | |||
| 60 | if (h_and) { | ||
| 61 | auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); | ||
| 62 | v.ir.SetPred(hsetp2.dest_pred_a, result); | ||
| 63 | v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); | ||
| 64 | } else { | ||
| 65 | v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); | ||
| 66 | v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | void TranslatorVisitor::HSETP2_reg(u64 insn) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<30, 1, u64> abs_b; | ||
| 75 | BitField<49, 1, u64> h_and; | ||
| 76 | BitField<31, 1, u64> neg_b; | ||
| 77 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 78 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 79 | } const hsetp2{insn}; | ||
| 80 | HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, | ||
| 81 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSETP2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> h_and; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | BitField<56, 1, u64> neg_b; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hsetp2{insn}; | ||
| 92 | |||
| 93 | HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, | ||
| 94 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSETP2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> h_and; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hsetp2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{static_cast<u32>(hsetp2.low << 6) | | ||
| 110 | static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hsetp2.high << 22) | | ||
| 112 | static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 113 | |||
| 114 | HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, | ||
| 115 | hsetp2.h_and != 0); | ||
| 116 | } | ||
| 117 | |||
| 118 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..b446aae0e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | [[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, | ||
| 12 | u32 offset) { | ||
| 13 | if (unaligned) { | ||
| 14 | return ir.Imm32(0); | ||
| 15 | } | ||
| 16 | return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); | ||
| 17 | } | ||
| 18 | } // Anonymous namespace | ||
| 19 | |||
| 20 | IR::U32 TranslatorVisitor::X(IR::Reg reg) { | ||
| 21 | return ir.GetReg(reg); | ||
| 22 | } | ||
| 23 | |||
| 24 | IR::U64 TranslatorVisitor::L(IR::Reg reg) { | ||
| 25 | if (!IR::IsAligned(reg, 2)) { | ||
| 26 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 27 | } | ||
| 28 | return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 29 | } | ||
| 30 | |||
| 31 | IR::F32 TranslatorVisitor::F(IR::Reg reg) { | ||
| 32 | return ir.BitCast<IR::F32>(X(reg)); | ||
| 33 | } | ||
| 34 | |||
| 35 | IR::F64 TranslatorVisitor::D(IR::Reg reg) { | ||
| 36 | if (!IR::IsAligned(reg, 2)) { | ||
| 37 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 38 | } | ||
| 39 | return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | ||
| 43 | ir.SetReg(dest_reg, value); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { | ||
| 47 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 48 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 49 | } | ||
| 50 | const IR::Value result{ir.UnpackUint2x32(value)}; | ||
| 51 | for (int i = 0; i < 2; i++) { | ||
| 52 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { | ||
| 57 | X(dest_reg, ir.BitCast<IR::U32>(value)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { | ||
| 61 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 62 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 63 | } | ||
| 64 | const IR::Value result{ir.UnpackDouble2x32(value)}; | ||
| 65 | for (int i = 0; i < 2; i++) { | ||
| 66 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | IR::U32 TranslatorVisitor::GetReg8(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<8, 8, IR::Reg> index; | ||
| 74 | } const reg{insn}; | ||
| 75 | return X(reg.index); | ||
| 76 | } | ||
| 77 | |||
| 78 | IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||
| 79 | union { | ||
| 80 | u64 raw; | ||
| 81 | BitField<20, 8, IR::Reg> index; | ||
| 82 | } const reg{insn}; | ||
| 83 | return X(reg.index); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||
| 87 | union { | ||
| 88 | u64 raw; | ||
| 89 | BitField<39, 8, IR::Reg> index; | ||
| 90 | } const reg{insn}; | ||
| 91 | return X(reg.index); | ||
| 92 | } | ||
| 93 | |||
| 94 | IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { | ||
| 95 | return ir.BitCast<IR::F32>(GetReg8(insn)); | ||
| 96 | } | ||
| 97 | |||
| 98 | IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { | ||
| 99 | return ir.BitCast<IR::F32>(GetReg20(insn)); | ||
| 100 | } | ||
| 101 | |||
| 102 | IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { | ||
| 103 | return ir.BitCast<IR::F32>(GetReg39(insn)); | ||
| 104 | } | ||
| 105 | |||
| 106 | IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { | ||
| 107 | union { | ||
| 108 | u64 raw; | ||
| 109 | BitField<20, 8, IR::Reg> index; | ||
| 110 | } const reg{insn}; | ||
| 111 | return D(reg.index); | ||
| 112 | } | ||
| 113 | |||
| 114 | IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { | ||
| 115 | union { | ||
| 116 | u64 raw; | ||
| 117 | BitField<39, 8, IR::Reg> index; | ||
| 118 | } const reg{insn}; | ||
| 119 | return D(reg.index); | ||
| 120 | } | ||
| 121 | |||
| 122 | static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { | ||
| 123 | union { | ||
| 124 | u64 raw; | ||
| 125 | BitField<20, 14, u64> offset; | ||
| 126 | BitField<34, 5, u64> binding; | ||
| 127 | } const cbuf{insn}; | ||
| 128 | |||
| 129 | if (cbuf.binding >= 18) { | ||
| 130 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 131 | } | ||
| 132 | if (cbuf.offset >= 0x10'000) { | ||
| 133 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); | ||
| 134 | } | ||
| 135 | const IR::Value binding{static_cast<u32>(cbuf.binding)}; | ||
| 136 | const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; | ||
| 137 | return {IR::U32{binding}, IR::U32{byte_offset}}; | ||
| 138 | } | ||
| 139 | |||
| 140 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | ||
| 141 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 142 | return ir.GetCbuf(binding, byte_offset); | ||
| 143 | } | ||
| 144 | |||
| 145 | IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { | ||
| 146 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 147 | return ir.GetFloatCbuf(binding, byte_offset); | ||
| 148 | } | ||
| 149 | |||
| 150 | IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { | ||
| 151 | union { | ||
| 152 | u64 raw; | ||
| 153 | BitField<20, 1, u64> unaligned; | ||
| 154 | } const cbuf{insn}; | ||
| 155 | |||
| 156 | const auto [binding, offset_value]{CbufAddr(insn)}; | ||
| 157 | const bool unaligned{cbuf.unaligned != 0}; | ||
| 158 | const u32 offset{offset_value.U32()}; | ||
| 159 | const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; | ||
| 160 | |||
| 161 | const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; | ||
| 162 | const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; | ||
| 163 | return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); | ||
| 164 | } | ||
| 165 | |||
| 166 | IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<20, 1, u64> unaligned; | ||
| 170 | } const cbuf{insn}; | ||
| 171 | |||
| 172 | if (cbuf.unaligned != 0) { | ||
| 173 | throw NotImplementedException("Unaligned packed constant buffer read"); | ||
| 174 | } | ||
| 175 | const auto [binding, lower_offset]{CbufAddr(insn)}; | ||
| 176 | const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; | ||
| 177 | const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; | ||
| 178 | const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; | ||
| 179 | return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); | ||
| 180 | } | ||
| 181 | |||
| 182 | IR::U32 TranslatorVisitor::GetImm20(u64 insn) { | ||
| 183 | union { | ||
| 184 | u64 raw; | ||
| 185 | BitField<20, 19, u64> value; | ||
| 186 | BitField<56, 1, u64> is_negative; | ||
| 187 | } const imm{insn}; | ||
| 188 | |||
| 189 | if (imm.is_negative != 0) { | ||
| 190 | const s64 raw{static_cast<s64>(imm.value)}; | ||
| 191 | return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); | ||
| 192 | } else { | ||
| 193 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { | ||
| 198 | union { | ||
| 199 | u64 raw; | ||
| 200 | BitField<20, 19, u64> value; | ||
| 201 | BitField<56, 1, u64> is_negative; | ||
| 202 | } const imm{insn}; | ||
| 203 | const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; | ||
| 204 | const u32 value{static_cast<u32>(imm.value) << 12}; | ||
| 205 | return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); | ||
| 206 | } | ||
| 207 | |||
| 208 | IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | BitField<20, 19, u64> value; | ||
| 212 | BitField<56, 1, u64> is_negative; | ||
| 213 | } const imm{insn}; | ||
| 214 | const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; | ||
| 215 | const u64 value{imm.value << 44}; | ||
| 216 | return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { | ||
| 220 | const s64 value{GetImm20(insn).U32()}; | ||
| 221 | return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32)); | ||
| 222 | } | ||
| 223 | |||
| 224 | IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||
| 225 | union { | ||
| 226 | u64 raw; | ||
| 227 | BitField<20, 32, u64> value; | ||
| 228 | } const imm{insn}; | ||
| 229 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 230 | } | ||
| 231 | |||
| 232 | IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<20, 32, u64> value; | ||
| 236 | } const imm{insn}; | ||
| 237 | return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value))); | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { | ||
| 241 | ir.SetZFlag(value); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::SetSFlag(const IR::U1& value) { | ||
| 245 | ir.SetSFlag(value); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SetCFlag(const IR::U1& value) { | ||
| 249 | ir.SetCFlag(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SetOFlag(const IR::U1& value) { | ||
| 253 | ir.SetOFlag(value); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::ResetZero() { | ||
| 257 | SetZFlag(ir.Imm1(false)); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::ResetSFlag() { | ||
| 261 | SetSFlag(ir.Imm1(false)); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::ResetCFlag() { | ||
| 265 | SetCFlag(ir.Imm1(false)); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::ResetOFlag() { | ||
| 269 | SetOFlag(ir.Imm1(false)); | ||
| 270 | } | ||
| 271 | |||
| 272 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..335e4f24f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -0,0 +1,387 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class CompareOp : u64 { | ||
| 15 | False, | ||
| 16 | LessThan, | ||
| 17 | Equal, | ||
| 18 | LessThanEqual, | ||
| 19 | GreaterThan, | ||
| 20 | NotEqual, | ||
| 21 | GreaterThanEqual, | ||
| 22 | True, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class BooleanOp : u64 { | ||
| 26 | AND, | ||
| 27 | OR, | ||
| 28 | XOR, | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum class PredicateOp : u64 { | ||
| 32 | False, | ||
| 33 | True, | ||
| 34 | Zero, | ||
| 35 | NonZero, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class FPCompareOp : u64 { | ||
| 39 | F, | ||
| 40 | LT, | ||
| 41 | EQ, | ||
| 42 | LE, | ||
| 43 | GT, | ||
| 44 | NE, | ||
| 45 | GE, | ||
| 46 | NUM, | ||
| 47 | Nan, | ||
| 48 | LTU, | ||
| 49 | EQU, | ||
| 50 | LEU, | ||
| 51 | GTU, | ||
| 52 | NEU, | ||
| 53 | GEU, | ||
| 54 | T, | ||
| 55 | }; | ||
| 56 | |||
| 57 | class TranslatorVisitor { | ||
| 58 | public: | ||
| 59 | explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} | ||
| 60 | |||
| 61 | Environment& env; | ||
| 62 | IR::IREmitter ir; | ||
| 63 | |||
| 64 | void AL2P(u64 insn); | ||
| 65 | void ALD(u64 insn); | ||
| 66 | void AST(u64 insn); | ||
| 67 | void ATOM_cas(u64 insn); | ||
| 68 | void ATOM(u64 insn); | ||
| 69 | void ATOMS_cas(u64 insn); | ||
| 70 | void ATOMS(u64 insn); | ||
| 71 | void B2R(u64 insn); | ||
| 72 | void BAR(u64 insn); | ||
| 73 | void BFE_reg(u64 insn); | ||
| 74 | void BFE_cbuf(u64 insn); | ||
| 75 | void BFE_imm(u64 insn); | ||
| 76 | void BFI_reg(u64 insn); | ||
| 77 | void BFI_rc(u64 insn); | ||
| 78 | void BFI_cr(u64 insn); | ||
| 79 | void BFI_imm(u64 insn); | ||
| 80 | void BPT(u64 insn); | ||
| 81 | void BRA(u64 insn); | ||
| 82 | void BRK(u64 insn); | ||
| 83 | void BRX(u64 insn); | ||
| 84 | void CAL(); | ||
| 85 | void CCTL(u64 insn); | ||
| 86 | void CCTLL(u64 insn); | ||
| 87 | void CONT(u64 insn); | ||
| 88 | void CS2R(u64 insn); | ||
| 89 | void CSET(u64 insn); | ||
| 90 | void CSETP(u64 insn); | ||
| 91 | void DADD_reg(u64 insn); | ||
| 92 | void DADD_cbuf(u64 insn); | ||
| 93 | void DADD_imm(u64 insn); | ||
| 94 | void DEPBAR(); | ||
| 95 | void DFMA_reg(u64 insn); | ||
| 96 | void DFMA_rc(u64 insn); | ||
| 97 | void DFMA_cr(u64 insn); | ||
| 98 | void DFMA_imm(u64 insn); | ||
| 99 | void DMNMX_reg(u64 insn); | ||
| 100 | void DMNMX_cbuf(u64 insn); | ||
| 101 | void DMNMX_imm(u64 insn); | ||
| 102 | void DMUL_reg(u64 insn); | ||
| 103 | void DMUL_cbuf(u64 insn); | ||
| 104 | void DMUL_imm(u64 insn); | ||
| 105 | void DSET_reg(u64 insn); | ||
| 106 | void DSET_cbuf(u64 insn); | ||
| 107 | void DSET_imm(u64 insn); | ||
| 108 | void DSETP_reg(u64 insn); | ||
| 109 | void DSETP_cbuf(u64 insn); | ||
| 110 | void DSETP_imm(u64 insn); | ||
| 111 | void EXIT(); | ||
| 112 | void F2F_reg(u64 insn); | ||
| 113 | void F2F_cbuf(u64 insn); | ||
| 114 | void F2F_imm(u64 insn); | ||
| 115 | void F2I_reg(u64 insn); | ||
| 116 | void F2I_cbuf(u64 insn); | ||
| 117 | void F2I_imm(u64 insn); | ||
| 118 | void FADD_reg(u64 insn); | ||
| 119 | void FADD_cbuf(u64 insn); | ||
| 120 | void FADD_imm(u64 insn); | ||
| 121 | void FADD32I(u64 insn); | ||
| 122 | void FCHK_reg(u64 insn); | ||
| 123 | void FCHK_cbuf(u64 insn); | ||
| 124 | void FCHK_imm(u64 insn); | ||
| 125 | void FCMP_reg(u64 insn); | ||
| 126 | void FCMP_rc(u64 insn); | ||
| 127 | void FCMP_cr(u64 insn); | ||
| 128 | void FCMP_imm(u64 insn); | ||
| 129 | void FFMA_reg(u64 insn); | ||
| 130 | void FFMA_rc(u64 insn); | ||
| 131 | void FFMA_cr(u64 insn); | ||
| 132 | void FFMA_imm(u64 insn); | ||
| 133 | void FFMA32I(u64 insn); | ||
| 134 | void FLO_reg(u64 insn); | ||
| 135 | void FLO_cbuf(u64 insn); | ||
| 136 | void FLO_imm(u64 insn); | ||
| 137 | void FMNMX_reg(u64 insn); | ||
| 138 | void FMNMX_cbuf(u64 insn); | ||
| 139 | void FMNMX_imm(u64 insn); | ||
| 140 | void FMUL_reg(u64 insn); | ||
| 141 | void FMUL_cbuf(u64 insn); | ||
| 142 | void FMUL_imm(u64 insn); | ||
| 143 | void FMUL32I(u64 insn); | ||
| 144 | void FSET_reg(u64 insn); | ||
| 145 | void FSET_cbuf(u64 insn); | ||
| 146 | void FSET_imm(u64 insn); | ||
| 147 | void FSETP_reg(u64 insn); | ||
| 148 | void FSETP_cbuf(u64 insn); | ||
| 149 | void FSETP_imm(u64 insn); | ||
| 150 | void FSWZADD(u64 insn); | ||
| 151 | void GETCRSPTR(u64 insn); | ||
| 152 | void GETLMEMBASE(u64 insn); | ||
| 153 | void HADD2_reg(u64 insn); | ||
| 154 | void HADD2_cbuf(u64 insn); | ||
| 155 | void HADD2_imm(u64 insn); | ||
| 156 | void HADD2_32I(u64 insn); | ||
| 157 | void HFMA2_reg(u64 insn); | ||
| 158 | void HFMA2_rc(u64 insn); | ||
| 159 | void HFMA2_cr(u64 insn); | ||
| 160 | void HFMA2_imm(u64 insn); | ||
| 161 | void HFMA2_32I(u64 insn); | ||
| 162 | void HMUL2_reg(u64 insn); | ||
| 163 | void HMUL2_cbuf(u64 insn); | ||
| 164 | void HMUL2_imm(u64 insn); | ||
| 165 | void HMUL2_32I(u64 insn); | ||
| 166 | void HSET2_reg(u64 insn); | ||
| 167 | void HSET2_cbuf(u64 insn); | ||
| 168 | void HSET2_imm(u64 insn); | ||
| 169 | void HSETP2_reg(u64 insn); | ||
| 170 | void HSETP2_cbuf(u64 insn); | ||
| 171 | void HSETP2_imm(u64 insn); | ||
| 172 | void I2F_reg(u64 insn); | ||
| 173 | void I2F_cbuf(u64 insn); | ||
| 174 | void I2F_imm(u64 insn); | ||
| 175 | void I2I_reg(u64 insn); | ||
| 176 | void I2I_cbuf(u64 insn); | ||
| 177 | void I2I_imm(u64 insn); | ||
| 178 | void IADD_reg(u64 insn); | ||
| 179 | void IADD_cbuf(u64 insn); | ||
| 180 | void IADD_imm(u64 insn); | ||
| 181 | void IADD3_reg(u64 insn); | ||
| 182 | void IADD3_cbuf(u64 insn); | ||
| 183 | void IADD3_imm(u64 insn); | ||
| 184 | void IADD32I(u64 insn); | ||
| 185 | void ICMP_reg(u64 insn); | ||
| 186 | void ICMP_rc(u64 insn); | ||
| 187 | void ICMP_cr(u64 insn); | ||
| 188 | void ICMP_imm(u64 insn); | ||
| 189 | void IDE(u64 insn); | ||
| 190 | void IDP_reg(u64 insn); | ||
| 191 | void IDP_imm(u64 insn); | ||
| 192 | void IMAD_reg(u64 insn); | ||
| 193 | void IMAD_rc(u64 insn); | ||
| 194 | void IMAD_cr(u64 insn); | ||
| 195 | void IMAD_imm(u64 insn); | ||
| 196 | void IMAD32I(u64 insn); | ||
| 197 | void IMADSP_reg(u64 insn); | ||
| 198 | void IMADSP_rc(u64 insn); | ||
| 199 | void IMADSP_cr(u64 insn); | ||
| 200 | void IMADSP_imm(u64 insn); | ||
| 201 | void IMNMX_reg(u64 insn); | ||
| 202 | void IMNMX_cbuf(u64 insn); | ||
| 203 | void IMNMX_imm(u64 insn); | ||
| 204 | void IMUL_reg(u64 insn); | ||
| 205 | void IMUL_cbuf(u64 insn); | ||
| 206 | void IMUL_imm(u64 insn); | ||
| 207 | void IMUL32I(u64 insn); | ||
| 208 | void IPA(u64 insn); | ||
| 209 | void ISBERD(u64 insn); | ||
| 210 | void ISCADD_reg(u64 insn); | ||
| 211 | void ISCADD_cbuf(u64 insn); | ||
| 212 | void ISCADD_imm(u64 insn); | ||
| 213 | void ISCADD32I(u64 insn); | ||
| 214 | void ISET_reg(u64 insn); | ||
| 215 | void ISET_cbuf(u64 insn); | ||
| 216 | void ISET_imm(u64 insn); | ||
| 217 | void ISETP_reg(u64 insn); | ||
| 218 | void ISETP_cbuf(u64 insn); | ||
| 219 | void ISETP_imm(u64 insn); | ||
| 220 | void JCAL(u64 insn); | ||
| 221 | void JMP(u64 insn); | ||
| 222 | void JMX(u64 insn); | ||
| 223 | void KIL(); | ||
| 224 | void LD(u64 insn); | ||
| 225 | void LDC(u64 insn); | ||
| 226 | void LDG(u64 insn); | ||
| 227 | void LDL(u64 insn); | ||
| 228 | void LDS(u64 insn); | ||
| 229 | void LEA_hi_reg(u64 insn); | ||
| 230 | void LEA_hi_cbuf(u64 insn); | ||
| 231 | void LEA_lo_reg(u64 insn); | ||
| 232 | void LEA_lo_cbuf(u64 insn); | ||
| 233 | void LEA_lo_imm(u64 insn); | ||
| 234 | void LEPC(u64 insn); | ||
| 235 | void LONGJMP(u64 insn); | ||
| 236 | void LOP_reg(u64 insn); | ||
| 237 | void LOP_cbuf(u64 insn); | ||
| 238 | void LOP_imm(u64 insn); | ||
| 239 | void LOP3_reg(u64 insn); | ||
| 240 | void LOP3_cbuf(u64 insn); | ||
| 241 | void LOP3_imm(u64 insn); | ||
| 242 | void LOP32I(u64 insn); | ||
| 243 | void MEMBAR(u64 insn); | ||
| 244 | void MOV_reg(u64 insn); | ||
| 245 | void MOV_cbuf(u64 insn); | ||
| 246 | void MOV_imm(u64 insn); | ||
| 247 | void MOV32I(u64 insn); | ||
| 248 | void MUFU(u64 insn); | ||
| 249 | void NOP(u64 insn); | ||
| 250 | void OUT_reg(u64 insn); | ||
| 251 | void OUT_cbuf(u64 insn); | ||
| 252 | void OUT_imm(u64 insn); | ||
| 253 | void P2R_reg(u64 insn); | ||
| 254 | void P2R_cbuf(u64 insn); | ||
| 255 | void P2R_imm(u64 insn); | ||
| 256 | void PBK(); | ||
| 257 | void PCNT(); | ||
| 258 | void PEXIT(u64 insn); | ||
| 259 | void PIXLD(u64 insn); | ||
| 260 | void PLONGJMP(u64 insn); | ||
| 261 | void POPC_reg(u64 insn); | ||
| 262 | void POPC_cbuf(u64 insn); | ||
| 263 | void POPC_imm(u64 insn); | ||
| 264 | void PRET(u64 insn); | ||
| 265 | void PRMT_reg(u64 insn); | ||
| 266 | void PRMT_rc(u64 insn); | ||
| 267 | void PRMT_cr(u64 insn); | ||
| 268 | void PRMT_imm(u64 insn); | ||
| 269 | void PSET(u64 insn); | ||
| 270 | void PSETP(u64 insn); | ||
| 271 | void R2B(u64 insn); | ||
| 272 | void R2P_reg(u64 insn); | ||
| 273 | void R2P_cbuf(u64 insn); | ||
| 274 | void R2P_imm(u64 insn); | ||
| 275 | void RAM(u64 insn); | ||
| 276 | void RED(u64 insn); | ||
| 277 | void RET(u64 insn); | ||
| 278 | void RRO_reg(u64 insn); | ||
| 279 | void RRO_cbuf(u64 insn); | ||
| 280 | void RRO_imm(u64 insn); | ||
| 281 | void RTT(u64 insn); | ||
| 282 | void S2R(u64 insn); | ||
| 283 | void SAM(u64 insn); | ||
| 284 | void SEL_reg(u64 insn); | ||
| 285 | void SEL_cbuf(u64 insn); | ||
| 286 | void SEL_imm(u64 insn); | ||
| 287 | void SETCRSPTR(u64 insn); | ||
| 288 | void SETLMEMBASE(u64 insn); | ||
| 289 | void SHF_l_reg(u64 insn); | ||
| 290 | void SHF_l_imm(u64 insn); | ||
| 291 | void SHF_r_reg(u64 insn); | ||
| 292 | void SHF_r_imm(u64 insn); | ||
| 293 | void SHFL(u64 insn); | ||
| 294 | void SHL_reg(u64 insn); | ||
| 295 | void SHL_cbuf(u64 insn); | ||
| 296 | void SHL_imm(u64 insn); | ||
| 297 | void SHR_reg(u64 insn); | ||
| 298 | void SHR_cbuf(u64 insn); | ||
| 299 | void SHR_imm(u64 insn); | ||
| 300 | void SSY(); | ||
| 301 | void ST(u64 insn); | ||
| 302 | void STG(u64 insn); | ||
| 303 | void STL(u64 insn); | ||
| 304 | void STP(u64 insn); | ||
| 305 | void STS(u64 insn); | ||
| 306 | void SUATOM(u64 insn); | ||
| 307 | void SUATOM_cas(u64 insn); | ||
| 308 | void SULD(u64 insn); | ||
| 309 | void SURED(u64 insn); | ||
| 310 | void SUST(u64 insn); | ||
| 311 | void SYNC(u64 insn); | ||
| 312 | void TEX(u64 insn); | ||
| 313 | void TEX_b(u64 insn); | ||
| 314 | void TEXS(u64 insn); | ||
| 315 | void TLD(u64 insn); | ||
| 316 | void TLD_b(u64 insn); | ||
| 317 | void TLD4(u64 insn); | ||
| 318 | void TLD4_b(u64 insn); | ||
| 319 | void TLD4S(u64 insn); | ||
| 320 | void TLDS(u64 insn); | ||
| 321 | void TMML(u64 insn); | ||
| 322 | void TMML_b(u64 insn); | ||
| 323 | void TXA(u64 insn); | ||
| 324 | void TXD(u64 insn); | ||
| 325 | void TXD_b(u64 insn); | ||
| 326 | void TXQ(u64 insn); | ||
| 327 | void TXQ_b(u64 insn); | ||
| 328 | void VABSDIFF(u64 insn); | ||
| 329 | void VABSDIFF4(u64 insn); | ||
| 330 | void VADD(u64 insn); | ||
| 331 | void VMAD(u64 insn); | ||
| 332 | void VMNMX(u64 insn); | ||
| 333 | void VOTE(u64 insn); | ||
| 334 | void VOTE_vtg(u64 insn); | ||
| 335 | void VSET(u64 insn); | ||
| 336 | void VSETP(u64 insn); | ||
| 337 | void VSHL(u64 insn); | ||
| 338 | void VSHR(u64 insn); | ||
| 339 | void XMAD_reg(u64 insn); | ||
| 340 | void XMAD_rc(u64 insn); | ||
| 341 | void XMAD_cr(u64 insn); | ||
| 342 | void XMAD_imm(u64 insn); | ||
| 343 | |||
| 344 | [[nodiscard]] IR::U32 X(IR::Reg reg); | ||
| 345 | [[nodiscard]] IR::U64 L(IR::Reg reg); | ||
| 346 | [[nodiscard]] IR::F32 F(IR::Reg reg); | ||
| 347 | [[nodiscard]] IR::F64 D(IR::Reg reg); | ||
| 348 | |||
| 349 | void X(IR::Reg dest_reg, const IR::U32& value); | ||
| 350 | void L(IR::Reg dest_reg, const IR::U64& value); | ||
| 351 | void F(IR::Reg dest_reg, const IR::F32& value); | ||
| 352 | void D(IR::Reg dest_reg, const IR::F64& value); | ||
| 353 | |||
| 354 | [[nodiscard]] IR::U32 GetReg8(u64 insn); | ||
| 355 | [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||
| 356 | [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||
| 357 | [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); | ||
| 358 | [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); | ||
| 359 | [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); | ||
| 360 | [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); | ||
| 361 | [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); | ||
| 362 | |||
| 363 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); | ||
| 364 | [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); | ||
| 365 | [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); | ||
| 366 | [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); | ||
| 367 | |||
| 368 | [[nodiscard]] IR::U32 GetImm20(u64 insn); | ||
| 369 | [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); | ||
| 370 | [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); | ||
| 371 | [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); | ||
| 372 | |||
| 373 | [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||
| 374 | [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); | ||
| 375 | |||
| 376 | void SetZFlag(const IR::U1& value); | ||
| 377 | void SetSFlag(const IR::U1& value); | ||
| 378 | void SetCFlag(const IR::U1& value); | ||
| 379 | void SetOFlag(const IR::U1& value); | ||
| 380 | |||
| 381 | void ResetZero(); | ||
| 382 | void ResetSFlag(); | ||
| 383 | void ResetCFlag(); | ||
| 384 | void ResetOFlag(); | ||
| 385 | }; | ||
| 386 | |||
| 387 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..8ffd84867 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||
| 12 | bool cc) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a; | ||
| 17 | } const iadd{insn}; | ||
| 18 | |||
| 19 | if (sat) { | ||
| 20 | throw NotImplementedException("IADD SAT"); | ||
| 21 | } | ||
| 22 | if (x && po) { | ||
| 23 | throw NotImplementedException("IADD X+PO"); | ||
| 24 | } | ||
| 25 | // Operand A is always read from here, negated if needed | ||
| 26 | IR::U32 op_a{v.X(iadd.src_a)}; | ||
| 27 | if (neg_a) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | // Add both operands | ||
| 31 | IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||
| 32 | if (x) { | ||
| 33 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 34 | result = v.ir.IAdd(result, carry); | ||
| 35 | } | ||
| 36 | if (po) { | ||
| 37 | // .PO adds one to the result | ||
| 38 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 39 | } | ||
| 40 | if (cc) { | ||
| 41 | // Store flags | ||
| 42 | // TODO: Does this grab the result pre-PO or after? | ||
| 43 | if (po) { | ||
| 44 | throw NotImplementedException("IADD CC+PO"); | ||
| 45 | } | ||
| 46 | // TODO: How does CC behave when X is set? | ||
| 47 | if (x) { | ||
| 48 | throw NotImplementedException("IADD X+CC"); | ||
| 49 | } | ||
| 50 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 51 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 52 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 53 | v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||
| 54 | } | ||
| 55 | // Store result | ||
| 56 | v.X(iadd.dest_reg, result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 60 | union { | ||
| 61 | u64 insn; | ||
| 62 | BitField<43, 1, u64> x; | ||
| 63 | BitField<47, 1, u64> cc; | ||
| 64 | BitField<48, 2, u64> three_for_po; | ||
| 65 | BitField<48, 1, u64> neg_b; | ||
| 66 | BitField<49, 1, u64> neg_a; | ||
| 67 | BitField<50, 1, u64> sat; | ||
| 68 | } const iadd{insn}; | ||
| 69 | |||
| 70 | const bool po{iadd.three_for_po == 3}; | ||
| 71 | if (!po && iadd.neg_b != 0) { | ||
| 72 | op_b = v.ir.INeg(op_b); | ||
| 73 | } | ||
| 74 | IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||
| 75 | } | ||
| 76 | } // Anonymous namespace | ||
| 77 | |||
| 78 | void TranslatorVisitor::IADD_reg(u64 insn) { | ||
| 79 | IADD(*this, insn, GetReg20(insn)); | ||
| 80 | } | ||
| 81 | |||
| 82 | void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||
| 83 | IADD(*this, insn, GetCbuf(insn)); | ||
| 84 | } | ||
| 85 | |||
| 86 | void TranslatorVisitor::IADD_imm(u64 insn) { | ||
| 87 | IADD(*this, insn, GetImm20(insn)); | ||
| 88 | } | ||
| 89 | |||
| 90 | void TranslatorVisitor::IADD32I(u64 insn) { | ||
| 91 | union { | ||
| 92 | u64 raw; | ||
| 93 | BitField<52, 1, u64> cc; | ||
| 94 | BitField<53, 1, u64> x; | ||
| 95 | BitField<54, 1, u64> sat; | ||
| 96 | BitField<55, 2, u64> three_for_po; | ||
| 97 | BitField<56, 1, u64> neg_a; | ||
| 98 | } const iadd32i{insn}; | ||
| 99 | |||
| 100 | const bool po{iadd32i.three_for_po == 3}; | ||
| 101 | const bool neg_a{!po && iadd32i.neg_a != 0}; | ||
| 102 | IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||
| 103 | } | ||
| 104 | |||
| 105 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..040cfc10f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Shift : u64 { | ||
| 12 | None, | ||
| 13 | Right, | ||
| 14 | Left, | ||
| 15 | }; | ||
| 16 | enum class Half : u64 { | ||
| 17 | All, | ||
| 18 | Lower, | ||
| 19 | Upper, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { | ||
| 23 | constexpr bool is_signed{false}; | ||
| 24 | switch (half) { | ||
| 25 | case Half::All: | ||
| 26 | return value; | ||
| 27 | case Half::Lower: | ||
| 28 | return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); | ||
| 29 | case Half::Upper: | ||
| 30 | return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); | ||
| 31 | } | ||
| 32 | throw NotImplementedException("Invalid half"); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { | ||
| 36 | switch (shift) { | ||
| 37 | case Shift::None: | ||
| 38 | return value; | ||
| 39 | case Shift::Right: { | ||
| 40 | // 33-bit RS IADD3 edge case | ||
| 41 | const IR::U1 edge_case{ir.GetCarryFromOp(value)}; | ||
| 42 | const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; | ||
| 43 | return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; | ||
| 44 | } | ||
| 45 | case Shift::Left: | ||
| 46 | return ir.ShiftLeftLogical(value, ir.Imm32(16)); | ||
| 47 | } | ||
| 48 | throw NotImplementedException("Invalid shift"); | ||
| 49 | } | ||
| 50 | |||
| 51 | void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, | ||
| 52 | Shift shift = Shift::None) { | ||
| 53 | union { | ||
| 54 | u64 insn; | ||
| 55 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 56 | BitField<47, 1, u64> cc; | ||
| 57 | BitField<48, 1, u64> x; | ||
| 58 | BitField<49, 1, u64> neg_c; | ||
| 59 | BitField<50, 1, u64> neg_b; | ||
| 60 | BitField<51, 1, u64> neg_a; | ||
| 61 | } iadd3{insn}; | ||
| 62 | |||
| 63 | if (iadd3.neg_a != 0) { | ||
| 64 | op_a = v.ir.INeg(op_a); | ||
| 65 | } | ||
| 66 | if (iadd3.neg_b != 0) { | ||
| 67 | op_b = v.ir.INeg(op_b); | ||
| 68 | } | ||
| 69 | if (iadd3.neg_c != 0) { | ||
| 70 | op_c = v.ir.INeg(op_c); | ||
| 71 | } | ||
| 72 | IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; | ||
| 73 | if (iadd3.x != 0) { | ||
| 74 | // TODO: How does RS behave when X is set? | ||
| 75 | if (shift == Shift::Right) { | ||
| 76 | throw NotImplementedException("IADD3 X+RS"); | ||
| 77 | } | ||
| 78 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 79 | lhs_1 = v.ir.IAdd(lhs_1, carry); | ||
| 80 | } | ||
| 81 | const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; | ||
| 82 | const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; | ||
| 83 | |||
| 84 | v.X(iadd3.dest_reg, result); | ||
| 85 | if (iadd3.cc != 0) { | ||
| 86 | // TODO: How does CC behave when X is set? | ||
| 87 | if (iadd3.x != 0) { | ||
| 88 | throw NotImplementedException("IADD3 X+CC"); | ||
| 89 | } | ||
| 90 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 91 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 92 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 93 | const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; | ||
| 94 | v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | } // Anonymous namespace | ||
| 98 | |||
| 99 | void TranslatorVisitor::IADD3_reg(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 insn; | ||
| 102 | BitField<37, 2, Shift> shift; | ||
| 103 | BitField<35, 2, Half> half_a; | ||
| 104 | BitField<33, 2, Half> half_b; | ||
| 105 | BitField<31, 2, Half> half_c; | ||
| 106 | } const iadd3{insn}; | ||
| 107 | |||
| 108 | const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; | ||
| 109 | const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; | ||
| 110 | const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; | ||
| 111 | IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); | ||
| 112 | } | ||
| 113 | |||
| 114 | void TranslatorVisitor::IADD3_cbuf(u64 insn) { | ||
| 115 | IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::IADD3_imm(u64 insn) { | ||
| 119 | IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..ba6e01926 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<48, 1, u64> is_signed; | ||
| 18 | BitField<49, 3, CompareOp> compare_op; | ||
| 19 | } const icmp{insn}; | ||
| 20 | |||
| 21 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 22 | const bool is_signed{icmp.is_signed != 0}; | ||
| 23 | const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; | ||
| 24 | |||
| 25 | const IR::U32 src_reg{v.X(icmp.src_reg)}; | ||
| 26 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 27 | |||
| 28 | v.X(icmp.dest_reg, result); | ||
| 29 | } | ||
| 30 | } // Anonymous namespace | ||
| 31 | |||
| 32 | void TranslatorVisitor::ICMP_reg(u64 insn) { | ||
| 33 | ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::ICMP_rc(u64 insn) { | ||
| 37 | ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::ICMP_cr(u64 insn) { | ||
| 41 | ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::ICMP_imm(u64 insn) { | ||
| 45 | ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..8ce1aee04 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 19 | union { | ||
| 20 | u64 insn; | ||
| 21 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 22 | BitField<8, 8, IR::Reg> src_reg; | ||
| 23 | BitField<39, 3, IR::Pred> pred; | ||
| 24 | BitField<42, 1, u64> neg_pred; | ||
| 25 | BitField<43, 1, u64> x; | ||
| 26 | BitField<44, 1, u64> bf; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<47, 1, u64> cc; | ||
| 29 | BitField<48, 1, u64> is_signed; | ||
| 30 | BitField<49, 3, CompareOp> compare_op; | ||
| 31 | } const iset{insn}; | ||
| 32 | |||
| 33 | const IR::U32 src_a{v.X(iset.src_reg)}; | ||
| 34 | const bool is_signed{iset.is_signed != 0}; | ||
| 35 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 36 | const bool x{iset.x != 0}; | ||
| 37 | const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; | ||
| 38 | |||
| 39 | IR::U1 pred{v.ir.GetPred(iset.pred)}; | ||
| 40 | if (iset.neg_pred != 0) { | ||
| 41 | pred = v.ir.LogicalNot(pred); | ||
| 42 | } | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; | ||
| 48 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 49 | |||
| 50 | v.X(iset.dest_reg, result); | ||
| 51 | if (iset.cc != 0) { | ||
| 52 | if (x) { | ||
| 53 | throw NotImplementedException("ISET.CC + X"); | ||
| 54 | } | ||
| 55 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 56 | v.SetZFlag(is_zero); | ||
| 57 | if (iset.bf != 0) { | ||
| 58 | v.ResetSFlag(); | ||
| 59 | } else { | ||
| 60 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 61 | } | ||
| 62 | v.ResetCFlag(); | ||
| 63 | v.ResetOFlag(); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | } // Anonymous namespace | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISET_reg(u64 insn) { | ||
| 69 | ISET(*this, insn, GetReg20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISET_cbuf(u64 insn) { | ||
| 73 | ISET(*this, insn, GetCbuf(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISET_imm(u64 insn) { | ||
| 77 | ISET(*this, insn, GetImm20(insn)); | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..0b8119ddd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class FloatFormat : u64 { | ||
| 13 | F16 = 1, | ||
| 14 | F32 = 2, | ||
| 15 | F64 = 3, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class IntFormat : u64 { | ||
| 19 | U8 = 0, | ||
| 20 | U16 = 1, | ||
| 21 | U32 = 2, | ||
| 22 | U64 = 3, | ||
| 23 | }; | ||
| 24 | |||
| 25 | union Encoding { | ||
| 26 | u64 raw; | ||
| 27 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 28 | BitField<8, 2, FloatFormat> float_format; | ||
| 29 | BitField<10, 2, IntFormat> int_format; | ||
| 30 | BitField<13, 1, u64> is_signed; | ||
| 31 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 32 | BitField<41, 2, u64> selector; | ||
| 33 | BitField<47, 1, u64> cc; | ||
| 34 | BitField<45, 1, u64> neg; | ||
| 35 | BitField<49, 1, u64> abs; | ||
| 36 | }; | ||
| 37 | |||
| 38 | bool Is64(u64 insn) { | ||
| 39 | return Encoding{insn}.int_format == IntFormat::U64; | ||
| 40 | } | ||
| 41 | |||
| 42 | int BitSize(FloatFormat format) { | ||
| 43 | switch (format) { | ||
| 44 | case FloatFormat::F16: | ||
| 45 | return 16; | ||
| 46 | case FloatFormat::F32: | ||
| 47 | return 32; | ||
| 48 | case FloatFormat::F64: | ||
| 49 | return 64; | ||
| 50 | } | ||
| 51 | throw NotImplementedException("Invalid float format {}", format); | ||
| 52 | } | ||
| 53 | |||
| 54 | IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { | ||
| 55 | const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; | ||
| 56 | const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; | ||
| 57 | const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; | ||
| 58 | const IR::U1 is_least{v.ir.IEqual(value, least_value)}; | ||
| 59 | return IR::U32{v.ir.Select(is_least, value, absolute)}; | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { | ||
| 63 | const Encoding i2f{insn}; | ||
| 64 | if (i2f.cc != 0) { | ||
| 65 | throw NotImplementedException("I2F CC"); | ||
| 66 | } | ||
| 67 | const bool is_signed{i2f.is_signed != 0}; | ||
| 68 | int src_bitsize{}; | ||
| 69 | switch (i2f.int_format) { | ||
| 70 | case IntFormat::U8: | ||
| 71 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 72 | v.ir.Imm32(8), is_signed); | ||
| 73 | if (i2f.abs != 0) { | ||
| 74 | src = SmallAbs(v, src, 8); | ||
| 75 | } | ||
| 76 | src_bitsize = 8; | ||
| 77 | break; | ||
| 78 | case IntFormat::U16: | ||
| 79 | if (i2f.selector == 1 || i2f.selector == 3) { | ||
| 80 | throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); | ||
| 81 | } | ||
| 82 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 83 | v.ir.Imm32(16), is_signed); | ||
| 84 | if (i2f.abs != 0) { | ||
| 85 | src = SmallAbs(v, src, 16); | ||
| 86 | } | ||
| 87 | src_bitsize = 16; | ||
| 88 | break; | ||
| 89 | case IntFormat::U32: | ||
| 90 | case IntFormat::U64: | ||
| 91 | if (i2f.selector != 0) { | ||
| 92 | throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); | ||
| 93 | } | ||
| 94 | if (i2f.abs != 0 && is_signed) { | ||
| 95 | src = v.ir.IAbs(src); | ||
| 96 | } | ||
| 97 | src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; | ||
| 101 | const int dst_bitsize{BitSize(i2f.float_format)}; | ||
| 102 | const IR::FpControl fp_control{ | ||
| 103 | .no_contraction = false, | ||
| 104 | .rounding = CastFpRounding(i2f.fp_rounding), | ||
| 105 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 106 | }; | ||
| 107 | auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize), | ||
| 108 | static_cast<size_t>(conversion_src_bitsize), is_signed, src, | ||
| 109 | fp_control)}; | ||
| 110 | if (i2f.neg != 0) { | ||
| 111 | if (i2f.abs != 0 || !is_signed) { | ||
| 112 | // We know the value is positive | ||
| 113 | value = v.ir.FPNeg(value); | ||
| 114 | } else { | ||
| 115 | // Only negate if the input isn't the lowest value | ||
| 116 | IR::U1 is_least; | ||
| 117 | if (src_bitsize == 64) { | ||
| 118 | is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min())); | ||
| 119 | } else if (src_bitsize == 32) { | ||
| 120 | is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min())); | ||
| 121 | } else { | ||
| 122 | const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; | ||
| 123 | is_least = v.ir.IEqual(src, least_value); | ||
| 124 | } | ||
| 125 | value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | switch (i2f.float_format) { | ||
| 129 | case FloatFormat::F16: { | ||
| 130 | const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 131 | v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | case FloatFormat::F32: | ||
| 135 | v.F(i2f.dest_reg, value); | ||
| 136 | break; | ||
| 137 | case FloatFormat::F64: { | ||
| 138 | if (!IR::IsAligned(i2f.dest_reg, 2)) { | ||
| 139 | throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); | ||
| 140 | } | ||
| 141 | const IR::Value vector{v.ir.UnpackDouble2x32(value)}; | ||
| 142 | for (int i = 0; i < 2; ++i) { | ||
| 143 | v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 144 | } | ||
| 145 | break; | ||
| 146 | } | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } // Anonymous namespace | ||
| 152 | |||
| 153 | void TranslatorVisitor::I2F_reg(u64 insn) { | ||
| 154 | if (Is64(insn)) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<20, 8, IR::Reg> reg; | ||
| 158 | } const value{insn}; | ||
| 159 | const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; | ||
| 160 | I2F(*this, insn, ir.PackUint2x32(regs)); | ||
| 161 | } else { | ||
| 162 | I2F(*this, insn, GetReg20(insn)); | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | void TranslatorVisitor::I2F_cbuf(u64 insn) { | ||
| 167 | if (Is64(insn)) { | ||
| 168 | I2F(*this, insn, GetPackedCbuf(insn)); | ||
| 169 | } else { | ||
| 170 | I2F(*this, insn, GetCbuf(insn)); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void TranslatorVisitor::I2F_imm(u64 insn) { | ||
| 175 | if (Is64(insn)) { | ||
| 176 | I2F(*this, insn, GetPackedImm20(insn)); | ||
| 177 | } else { | ||
| 178 | I2F(*this, insn, GetImm20(insn)); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..5feefc0ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class MaxShift : u64 { | ||
| 12 | U32, | ||
| 13 | Undefined, | ||
| 14 | U64, | ||
| 15 | S64, | ||
| 16 | }; | ||
| 17 | |||
| 18 | IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, | ||
| 19 | bool right_shift, bool is_signed) { | ||
| 20 | if (!right_shift) { | ||
| 21 | return ir.ShiftLeftLogical(packed_int, safe_shift); | ||
| 22 | } | ||
| 23 | if (is_signed) { | ||
| 24 | return ir.ShiftRightArithmetic(packed_int, safe_shift); | ||
| 25 | } | ||
| 26 | return ir.ShiftRightLogical(packed_int, safe_shift); | ||
| 27 | } | ||
| 28 | |||
| 29 | void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, | ||
| 30 | bool right_shift) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<0, 8, IR::Reg> lo_bits_reg; | ||
| 35 | BitField<37, 2, MaxShift> max_shift; | ||
| 36 | BitField<47, 1, u64> cc; | ||
| 37 | BitField<48, 2, u64> x_mode; | ||
| 38 | BitField<50, 1, u64> wrap; | ||
| 39 | } const shf{insn}; | ||
| 40 | |||
| 41 | if (shf.cc != 0) { | ||
| 42 | throw NotImplementedException("SHF CC"); | ||
| 43 | } | ||
| 44 | if (shf.x_mode != 0) { | ||
| 45 | throw NotImplementedException("SHF X Mode"); | ||
| 46 | } | ||
| 47 | if (shf.max_shift == MaxShift::Undefined) { | ||
| 48 | throw NotImplementedException("SHF Use of undefined MaxShift value"); | ||
| 49 | } | ||
| 50 | const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; | ||
| 51 | const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; | ||
| 52 | const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; | ||
| 53 | const IR::U32 safe_shift{shf.wrap != 0 | ||
| 54 | ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) | ||
| 55 | : v.ir.UMin(shift, max_shift)}; | ||
| 56 | |||
| 57 | const bool is_signed{shf.max_shift == MaxShift::S64}; | ||
| 58 | const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; | ||
| 59 | const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; | ||
| 60 | |||
| 61 | const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; | ||
| 62 | v.X(shf.dest_reg, result); | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::SHF_l_reg(u64 insn) { | ||
| 67 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::SHF_l_imm(u64 insn) { | ||
| 71 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::SHF_r_reg(u64 insn) { | ||
| 75 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); | ||
| 76 | } | ||
| 77 | |||
| 78 | void TranslatorVisitor::SHF_r_imm(u64 insn) { | ||
| 79 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..1badbacc4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 2, u64> mode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const imnmx{insn}; | ||
| 22 | |||
| 23 | if (imnmx.cc != 0) { | ||
| 24 | throw NotImplementedException("IMNMX CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | if (imnmx.mode != 0) { | ||
| 28 | throw NotImplementedException("IMNMX.MODE"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; | ||
| 32 | const IR::U32 op_a{v.X(imnmx.src_reg)}; | ||
| 33 | IR::U32 min; | ||
| 34 | IR::U32 max; | ||
| 35 | |||
| 36 | if (imnmx.is_signed != 0) { | ||
| 37 | min = IR::U32{v.ir.SMin(op_a, op_b)}; | ||
| 38 | max = IR::U32{v.ir.SMax(op_a, op_b)}; | ||
| 39 | } else { | ||
| 40 | min = IR::U32{v.ir.UMin(op_a, op_b)}; | ||
| 41 | max = IR::U32{v.ir.UMax(op_a, op_b)}; | ||
| 42 | } | ||
| 43 | if (imnmx.neg_pred != 0) { | ||
| 44 | std::swap(min, max); | ||
| 45 | } | ||
| 46 | |||
| 47 | const IR::U32 result{v.ir.Select(pred, min, max)}; | ||
| 48 | v.X(imnmx.dest_reg, result); | ||
| 49 | } | ||
| 50 | } // Anonymous namespace | ||
| 51 | |||
| 52 | void TranslatorVisitor::IMNMX_reg(u64 insn) { | ||
| 53 | IMNMX(*this, insn, GetReg20(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::IMNMX_cbuf(u64 insn) { | ||
| 57 | IMNMX(*this, insn, GetCbuf(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::IMNMX_imm(u64 insn) { | ||
| 61 | IMNMX(*this, insn, GetImm20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | } const popc{insn}; | ||
| 17 | |||
| 18 | const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); | ||
| 19 | const IR::U32 result = v.ir.BitCount(operand); | ||
| 20 | v.X(popc.dest_reg, result); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void TranslatorVisitor::POPC_reg(u64 insn) { | ||
| 25 | POPC(*this, insn, GetReg20(insn)); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::POPC_cbuf(u64 insn) { | ||
| 29 | POPC(*this, insn, GetCbuf(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::POPC_imm(u64 insn) { | ||
| 33 | POPC(*this, insn, GetImm20(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..044671943 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, | ||
| 12 | u64 scale_imm) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> op_a; | ||
| 17 | } const iscadd{insn}; | ||
| 18 | |||
| 19 | const bool po{neg_a && neg_b}; | ||
| 20 | IR::U32 op_a{v.X(iscadd.op_a)}; | ||
| 21 | if (po) { | ||
| 22 | // When PO is present, add one | ||
| 23 | op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); | ||
| 24 | } else { | ||
| 25 | // When PO is not present, the bits are interpreted as negation | ||
| 26 | if (neg_a) { | ||
| 27 | op_a = v.ir.INeg(op_a); | ||
| 28 | } | ||
| 29 | if (neg_b) { | ||
| 30 | op_b = v.ir.INeg(op_b); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | // With the operands already processed, scale A | ||
| 34 | const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))}; | ||
| 35 | const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||
| 36 | |||
| 37 | const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||
| 38 | v.X(iscadd.dest_reg, result); | ||
| 39 | |||
| 40 | if (cc) { | ||
| 41 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 42 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 43 | const IR::U1 carry{v.ir.GetCarryFromOp(result)}; | ||
| 44 | const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; | ||
| 45 | v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); | ||
| 46 | v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<47, 1, u64> cc; | ||
| 54 | BitField<48, 1, u64> neg_b; | ||
| 55 | BitField<49, 1, u64> neg_a; | ||
| 56 | BitField<39, 5, u64> scale; | ||
| 57 | } const iscadd{insn}; | ||
| 58 | |||
| 59 | ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||
| 65 | ISCADD(*this, insn, GetReg20(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||
| 69 | ISCADD(*this, insn, GetCbuf(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||
| 73 | ISCADD(*this, insn, GetImm20(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISCADD32I(u64 insn) { | ||
| 77 | union { | ||
| 78 | u64 raw; | ||
| 79 | BitField<52, 1, u64> cc; | ||
| 80 | BitField<53, 5, u64> scale; | ||
| 81 | } const iscadd{insn}; | ||
| 82 | |||
| 83 | return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..bee10e5b9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 19 | union { | ||
| 20 | u64 raw; | ||
| 21 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 22 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 23 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 24 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 25 | BitField<42, 1, u64> neg_bop_pred; | ||
| 26 | BitField<43, 1, u64> x; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<48, 1, u64> is_signed; | ||
| 29 | BitField<49, 3, CompareOp> compare_op; | ||
| 30 | } const isetp{insn}; | ||
| 31 | |||
| 32 | const bool is_signed{isetp.is_signed != 0}; | ||
| 33 | const bool x{isetp.x != 0}; | ||
| 34 | const BooleanOp bop{isetp.bop}; | ||
| 35 | const CompareOp compare_op{isetp.compare_op}; | ||
| 36 | const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||
| 37 | const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; | ||
| 38 | const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||
| 39 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 40 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 41 | v.ir.SetPred(isetp.dest_pred_a, result_a); | ||
| 42 | v.ir.SetPred(isetp.dest_pred_b, result_b); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::ISETP_reg(u64 insn) { | ||
| 47 | ISETP(*this, insn, GetReg20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||
| 51 | ISETP(*this, insn, GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::ISETP_imm(u64 insn) { | ||
| 55 | ISETP(*this, insn, GetImm20(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..20af68852 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> w; | ||
| 17 | BitField<43, 1, u64> x; | ||
| 18 | BitField<47, 1, u64> cc; | ||
| 19 | } const shl{insn}; | ||
| 20 | |||
| 21 | if (shl.x != 0) { | ||
| 22 | throw NotImplementedException("SHL.X"); | ||
| 23 | } | ||
| 24 | if (shl.cc != 0) { | ||
| 25 | throw NotImplementedException("SHL.CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 base{v.X(shl.src_reg_a)}; | ||
| 28 | IR::U32 result; | ||
| 29 | if (shl.w != 0) { | ||
| 30 | // When .W is set, the shift value is wrapped | ||
| 31 | // To emulate this we just have to wrap it ourselves. | ||
| 32 | const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||
| 33 | result = v.ir.ShiftLeftLogical(base, shift); | ||
| 34 | } else { | ||
| 35 | // When .W is not set, the shift value is clamped between 0 and 32. | ||
| 36 | // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||
| 37 | // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||
| 38 | // | ||
| 39 | // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||
| 40 | // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||
| 41 | // or equal to the bit width of the components of Base." | ||
| 42 | // | ||
| 43 | // And on the GLASM specification it is also safe to evaluate out of bounds: | ||
| 44 | // | ||
| 45 | // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||
| 46 | // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||
| 47 | // is negative, or greater than or equal to the number of bits in the first operand." | ||
| 48 | // | ||
| 49 | // Emphasis on undefined results in contrast to undefined behavior. | ||
| 50 | // | ||
| 51 | const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||
| 52 | const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||
| 53 | result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; | ||
| 54 | } | ||
| 55 | v.X(shl.dest_reg, result); | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHL_reg(u64 insn) { | ||
| 60 | SHL(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHL_cbuf(u64 insn) { | ||
| 64 | SHL(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::SHL_imm(u64 insn) { | ||
| 68 | SHL(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..be00bb605 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> is_wrapped; | ||
| 17 | BitField<40, 1, u64> brev; | ||
| 18 | BitField<43, 1, u64> xmode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const shr{insn}; | ||
| 22 | |||
| 23 | if (shr.xmode != 0) { | ||
| 24 | throw NotImplementedException("SHR.XMODE"); | ||
| 25 | } | ||
| 26 | if (shr.cc != 0) { | ||
| 27 | throw NotImplementedException("SHR.CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | IR::U32 base{v.X(shr.src_reg_a)}; | ||
| 31 | if (shr.brev == 1) { | ||
| 32 | base = v.ir.BitReverse(base); | ||
| 33 | } | ||
| 34 | IR::U32 result; | ||
| 35 | const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); | ||
| 36 | if (shr.is_signed == 1) { | ||
| 37 | result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; | ||
| 38 | } else { | ||
| 39 | result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; | ||
| 40 | } | ||
| 41 | |||
| 42 | if (shr.is_wrapped == 0) { | ||
| 43 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 44 | const IR::U32 safe_bits{v.ir.Imm32(32)}; | ||
| 45 | |||
| 46 | const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; | ||
| 47 | const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; | ||
| 48 | const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 49 | result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; | ||
| 50 | } | ||
| 51 | v.X(shr.dest_reg, result); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::SHR_reg(u64 insn) { | ||
| 56 | SHR(*this, insn, GetReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHR_cbuf(u64 insn) { | ||
| 60 | SHR(*this, insn, GetCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHR_imm(u64 insn) { | ||
| 64 | SHR(*this, insn, GetImm20(insn)); | ||
| 65 | } | ||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..2932cdc42 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SelectMode : u64 { | ||
| 12 | Default, | ||
| 13 | CLO, | ||
| 14 | CHI, | ||
| 15 | CSFU, | ||
| 16 | CBCC, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Half : u64 { | ||
| 20 | H0, // Least-significant bits (15:0) | ||
| 21 | H1, // Most-significant bits (31:16) | ||
| 22 | }; | ||
| 23 | |||
| 24 | IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||
| 25 | const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||
| 26 | return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||
| 27 | } | ||
| 28 | |||
| 29 | void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||
| 30 | SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||
| 31 | union { | ||
| 32 | u64 raw; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 35 | BitField<47, 1, u64> cc; | ||
| 36 | BitField<48, 1, u64> is_a_signed; | ||
| 37 | BitField<49, 1, u64> is_b_signed; | ||
| 38 | BitField<53, 1, Half> half_a; | ||
| 39 | } const xmad{insn}; | ||
| 40 | |||
| 41 | if (x) { | ||
| 42 | throw NotImplementedException("XMAD X"); | ||
| 43 | } | ||
| 44 | const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||
| 45 | const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||
| 46 | |||
| 47 | IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||
| 48 | if (psl) { | ||
| 49 | // .PSL shifts the product 16 bits | ||
| 50 | product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||
| 51 | } | ||
| 52 | const IR::U32 op_c{[&]() -> IR::U32 { | ||
| 53 | switch (select_mode) { | ||
| 54 | case SelectMode::Default: | ||
| 55 | return src_c; | ||
| 56 | case SelectMode::CLO: | ||
| 57 | return ExtractHalf(v, src_c, Half::H0, false); | ||
| 58 | case SelectMode::CHI: | ||
| 59 | return ExtractHalf(v, src_c, Half::H1, false); | ||
| 60 | case SelectMode::CBCC: | ||
| 61 | return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); | ||
| 62 | case SelectMode::CSFU: | ||
| 63 | throw NotImplementedException("XMAD CSFU"); | ||
| 64 | } | ||
| 65 | throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||
| 66 | }()}; | ||
| 67 | IR::U32 result{v.ir.IAdd(product, op_c)}; | ||
| 68 | if (mrg) { | ||
| 69 | // .MRG inserts src_b [15:0] into result's [31:16]. | ||
| 70 | const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||
| 71 | result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||
| 72 | } | ||
| 73 | if (xmad.cc) { | ||
| 74 | throw NotImplementedException("XMAD CC"); | ||
| 75 | } | ||
| 76 | // Store result | ||
| 77 | v.X(xmad.dest_reg, result); | ||
| 78 | } | ||
| 79 | } // Anonymous namespace | ||
| 80 | |||
| 81 | void TranslatorVisitor::XMAD_reg(u64 insn) { | ||
| 82 | union { | ||
| 83 | u64 raw; | ||
| 84 | BitField<35, 1, Half> half_b; | ||
| 85 | BitField<36, 1, u64> psl; | ||
| 86 | BitField<37, 1, u64> mrg; | ||
| 87 | BitField<38, 1, u64> x; | ||
| 88 | BitField<50, 3, SelectMode> select_mode; | ||
| 89 | } const xmad{insn}; | ||
| 90 | |||
| 91 | XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 92 | xmad.mrg != 0, xmad.x != 0); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::XMAD_rc(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<50, 2, SelectMode> select_mode; | ||
| 99 | BitField<52, 1, Half> half_b; | ||
| 100 | BitField<54, 1, u64> x; | ||
| 101 | } const xmad{insn}; | ||
| 102 | |||
| 103 | XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, | ||
| 104 | xmad.x != 0); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::XMAD_cr(u64 insn) { | ||
| 108 | union { | ||
| 109 | u64 raw; | ||
| 110 | BitField<50, 2, SelectMode> select_mode; | ||
| 111 | BitField<52, 1, Half> half_b; | ||
| 112 | BitField<54, 1, u64> x; | ||
| 113 | BitField<55, 1, u64> psl; | ||
| 114 | BitField<56, 1, u64> mrg; | ||
| 115 | } const xmad{insn}; | ||
| 116 | |||
| 117 | XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 118 | xmad.mrg != 0, xmad.x != 0); | ||
| 119 | } | ||
| 120 | |||
| 121 | void TranslatorVisitor::XMAD_imm(u64 insn) { | ||
| 122 | union { | ||
| 123 | u64 raw; | ||
| 124 | BitField<20, 16, u64> src_b; | ||
| 125 | BitField<36, 1, u64> psl; | ||
| 126 | BitField<37, 1, u64> mrg; | ||
| 127 | BitField<38, 1, u64> x; | ||
| 128 | BitField<50, 3, SelectMode> select_mode; | ||
| 129 | } const xmad{insn}; | ||
| 130 | |||
| 131 | XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, | ||
| 132 | Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..53e8d8923 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | |||
| @@ -0,0 +1,126 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class IntegerWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Short, | ||
| 14 | Word, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { | ||
| 18 | switch (width) { | ||
| 19 | case IntegerWidth::Byte: | ||
| 20 | return ir.Imm32(8); | ||
| 21 | case IntegerWidth::Short: | ||
| 22 | return ir.Imm32(16); | ||
| 23 | case IntegerWidth::Word: | ||
| 24 | return ir.Imm32(32); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid width {}", width); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, | ||
| 31 | IntegerWidth dst_width) { | ||
| 32 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 33 | const IR::U32 count{WidthSize(ir, dst_width)}; | ||
| 34 | return ir.BitFieldExtract(src, zero, count, false); | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, | ||
| 38 | bool dst_signed, bool src_signed) { | ||
| 39 | IR::U32 min{}; | ||
| 40 | IR::U32 max{}; | ||
| 41 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 42 | switch (dst_width) { | ||
| 43 | case IntegerWidth::Byte: | ||
| 44 | min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; | ||
| 45 | max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); | ||
| 46 | break; | ||
| 47 | case IntegerWidth::Short: | ||
| 48 | min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; | ||
| 49 | max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); | ||
| 50 | break; | ||
| 51 | case IntegerWidth::Word: | ||
| 52 | min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; | ||
| 53 | max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid width {}", dst_width); | ||
| 57 | } | ||
| 58 | const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; | ||
| 59 | return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { | ||
| 63 | union { | ||
| 64 | u64 insn; | ||
| 65 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 66 | BitField<8, 2, IntegerWidth> dst_fmt; | ||
| 67 | BitField<12, 1, u64> dst_fmt_sign; | ||
| 68 | BitField<10, 2, IntegerWidth> src_fmt; | ||
| 69 | BitField<13, 1, u64> src_fmt_sign; | ||
| 70 | BitField<41, 3, u64> selector; | ||
| 71 | BitField<45, 1, u64> neg; | ||
| 72 | BitField<47, 1, u64> cc; | ||
| 73 | BitField<49, 1, u64> abs; | ||
| 74 | BitField<50, 1, u64> sat; | ||
| 75 | } const i2i{insn}; | ||
| 76 | |||
| 77 | if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { | ||
| 78 | throw NotImplementedException("16-bit source format incompatible with selector {}", | ||
| 79 | i2i.selector); | ||
| 80 | } | ||
| 81 | if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { | ||
| 82 | throw NotImplementedException("32-bit source format incompatible with selector {}", | ||
| 83 | i2i.selector); | ||
| 84 | } | ||
| 85 | |||
| 86 | const s32 selector{static_cast<s32>(i2i.selector)}; | ||
| 87 | const IR::U32 offset{v.ir.Imm32(selector * 8)}; | ||
| 88 | const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; | ||
| 89 | const bool src_signed{i2i.src_fmt_sign != 0}; | ||
| 90 | const bool dst_signed{i2i.dst_fmt_sign != 0}; | ||
| 91 | const bool sat{i2i.sat != 0}; | ||
| 92 | |||
| 93 | IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; | ||
| 94 | if (i2i.abs != 0) { | ||
| 95 | src_values = v.ir.IAbs(src_values); | ||
| 96 | } | ||
| 97 | if (i2i.neg != 0) { | ||
| 98 | src_values = v.ir.INeg(src_values); | ||
| 99 | } | ||
| 100 | const IR::U32 result{ | ||
| 101 | sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) | ||
| 102 | : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; | ||
| 103 | |||
| 104 | v.X(i2i.dest_reg, result); | ||
| 105 | if (i2i.cc != 0) { | ||
| 106 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 107 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 108 | v.ResetCFlag(); | ||
| 109 | v.ResetOFlag(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } // Anonymous namespace | ||
| 113 | |||
| 114 | void TranslatorVisitor::I2I_reg(u64 insn) { | ||
| 115 | I2I(*this, insn, GetReg20(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::I2I_cbuf(u64 insn) { | ||
| 119 | I2I(*this, insn, GetCbuf(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | void TranslatorVisitor::I2I_imm(u64 insn) { | ||
| 123 | I2I(*this, insn, GetImm20(insn)); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..9b85f8059 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | Patch, | ||
| 14 | Prim, | ||
| 15 | Attr, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class Shift : u64 { | ||
| 19 | Default, | ||
| 20 | U16, | ||
| 21 | B32, | ||
| 22 | }; | ||
| 23 | |||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::ISBERD(u64 insn) { | ||
| 27 | union { | ||
| 28 | u64 raw; | ||
| 29 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 30 | BitField<8, 8, IR::Reg> src_reg; | ||
| 31 | BitField<31, 1, u64> skew; | ||
| 32 | BitField<32, 1, u64> o; | ||
| 33 | BitField<33, 2, Mode> mode; | ||
| 34 | BitField<47, 2, Shift> shift; | ||
| 35 | } const isberd{insn}; | ||
| 36 | |||
| 37 | if (isberd.skew != 0) { | ||
| 38 | throw NotImplementedException("SKEW"); | ||
| 39 | } | ||
| 40 | if (isberd.o != 0) { | ||
| 41 | throw NotImplementedException("O"); | ||
| 42 | } | ||
| 43 | if (isberd.mode != Mode::Default) { | ||
| 44 | throw NotImplementedException("Mode {}", isberd.mode.Value()); | ||
| 45 | } | ||
| 46 | if (isberd.shift != Shift::Default) { | ||
| 47 | throw NotImplementedException("Shift {}", isberd.shift.Value()); | ||
| 48 | } | ||
| 49 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 50 | X(isberd.dest_reg, X(isberd.src_reg)); | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..2300088e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | using namespace LDC; | ||
| 12 | namespace { | ||
| 13 | std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, | ||
| 14 | const IR::U32& reg, const IR::U32& imm) { | ||
| 15 | switch (mode) { | ||
| 16 | case Mode::Default: | ||
| 17 | return {imm_index, ir.IAdd(reg, imm)}; | ||
| 18 | default: | ||
| 19 | break; | ||
| 20 | } | ||
| 21 | throw NotImplementedException("Mode {}", mode); | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void TranslatorVisitor::LDC(u64 insn) { | ||
| 26 | const Encoding ldc{insn}; | ||
| 27 | const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; | ||
| 28 | const IR::U32 reg{X(ldc.src_reg)}; | ||
| 29 | const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; | ||
| 30 | const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; | ||
| 31 | switch (ldc.size) { | ||
| 32 | case Size::U8: | ||
| 33 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); | ||
| 34 | break; | ||
| 35 | case Size::S8: | ||
| 36 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); | ||
| 37 | break; | ||
| 38 | case Size::U16: | ||
| 39 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); | ||
| 40 | break; | ||
| 41 | case Size::S16: | ||
| 42 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); | ||
| 43 | break; | ||
| 44 | case Size::B32: | ||
| 45 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); | ||
| 46 | break; | ||
| 47 | case Size::B64: { | ||
| 48 | if (!IR::IsAligned(ldc.dest_reg, 2)) { | ||
| 49 | throw NotImplementedException("Unaligned destination register"); | ||
| 50 | } | ||
| 51 | const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; | ||
| 52 | for (int i = 0; i < 2; ++i) { | ||
| 53 | X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | default: | ||
| 58 | throw NotImplementedException("Invalid size {}", ldc.size.Value()); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell::LDC { | ||
| 12 | |||
| 13 | enum class Mode : u64 { | ||
| 14 | Default, | ||
| 15 | IL, | ||
| 16 | IS, | ||
| 17 | ISL, | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class Size : u64 { | ||
| 21 | U8, | ||
| 22 | S8, | ||
| 23 | U16, | ||
| 24 | S16, | ||
| 25 | B32, | ||
| 26 | B64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | union Encoding { | ||
| 30 | u64 raw; | ||
| 31 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 32 | BitField<8, 8, IR::Reg> src_reg; | ||
| 33 | BitField<20, 16, s64> offset; | ||
| 34 | BitField<36, 5, u64> index; | ||
| 35 | BitField<44, 2, Mode> mode; | ||
| 36 | BitField<48, 3, Size> size; | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace Shader::Maxwell::LDC | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..4a0f04e47 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, | ||
| 12 | bool neg, bool x) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 3, IR::Pred> pred; | ||
| 19 | } const lea{insn}; | ||
| 20 | |||
| 21 | if (x) { | ||
| 22 | throw NotImplementedException("LEA.HI X"); | ||
| 23 | } | ||
| 24 | if (lea.pred != IR::Pred::PT) { | ||
| 25 | throw NotImplementedException("LEA.HI Pred"); | ||
| 26 | } | ||
| 27 | if (lea.cc != 0) { | ||
| 28 | throw NotImplementedException("LEA.HI CC"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 32 | const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; | ||
| 33 | const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; | ||
| 34 | |||
| 35 | const s32 hi_scale{32 - static_cast<s32>(scale)}; | ||
| 36 | const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; | ||
| 37 | const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; | ||
| 38 | |||
| 39 | IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; | ||
| 40 | v.X(lea.dest_reg, result); | ||
| 41 | } | ||
| 42 | |||
| 43 | void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { | ||
| 44 | union { | ||
| 45 | u64 insn; | ||
| 46 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 47 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 48 | BitField<39, 5, u64> scale; | ||
| 49 | BitField<45, 1, u64> neg; | ||
| 50 | BitField<46, 1, u64> x; | ||
| 51 | BitField<47, 1, u64> cc; | ||
| 52 | BitField<48, 3, IR::Pred> pred; | ||
| 53 | } const lea{insn}; | ||
| 54 | if (lea.x != 0) { | ||
| 55 | throw NotImplementedException("LEA.LO X"); | ||
| 56 | } | ||
| 57 | if (lea.pred != IR::Pred::PT) { | ||
| 58 | throw NotImplementedException("LEA.LO Pred"); | ||
| 59 | } | ||
| 60 | if (lea.cc != 0) { | ||
| 61 | throw NotImplementedException("LEA.LO CC"); | ||
| 62 | } | ||
| 63 | |||
| 64 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 65 | const s32 scale{static_cast<s32>(lea.scale)}; | ||
| 66 | const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; | ||
| 67 | const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; | ||
| 68 | |||
| 69 | IR::U32 result{v.ir.IAdd(base, scaled_offset)}; | ||
| 70 | v.X(lea.dest_reg, result); | ||
| 71 | } | ||
| 72 | } // Anonymous namespace | ||
| 73 | |||
| 74 | void TranslatorVisitor::LEA_hi_reg(u64 insn) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<28, 5, u64> scale; | ||
| 78 | BitField<37, 1, u64> neg; | ||
| 79 | BitField<38, 1, u64> x; | ||
| 80 | } const lea{insn}; | ||
| 81 | |||
| 82 | LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 83 | } | ||
| 84 | |||
| 85 | void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { | ||
| 86 | union { | ||
| 87 | u64 insn; | ||
| 88 | BitField<51, 5, u64> scale; | ||
| 89 | BitField<56, 1, u64> neg; | ||
| 90 | BitField<57, 1, u64> x; | ||
| 91 | } const lea{insn}; | ||
| 92 | |||
| 93 | LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::LEA_lo_reg(u64 insn) { | ||
| 97 | LEA_lo(*this, insn, GetReg20(insn)); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { | ||
| 101 | LEA_lo(*this, insn, GetCbuf(insn)); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::LEA_lo_imm(u64 insn) { | ||
| 105 | LEA_lo(*this, insn, GetImm20(insn)); | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..924fb7a40 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Size : u64 { | ||
| 15 | B32, | ||
| 16 | B64, | ||
| 17 | B96, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class InterpolationMode : u64 { | ||
| 22 | Pass, | ||
| 23 | Multiply, | ||
| 24 | Constant, | ||
| 25 | Sc, | ||
| 26 | }; | ||
| 27 | |||
| 28 | enum class SampleMode : u64 { | ||
| 29 | Default, | ||
| 30 | Centroid, | ||
| 31 | Offset, | ||
| 32 | }; | ||
| 33 | |||
| 34 | u32 NumElements(Size size) { | ||
| 35 | switch (size) { | ||
| 36 | case Size::B32: | ||
| 37 | return 1; | ||
| 38 | case Size::B64: | ||
| 39 | return 2; | ||
| 40 | case Size::B96: | ||
| 41 | return 3; | ||
| 42 | case Size::B128: | ||
| 43 | return 4; | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid size {}", size); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename F> | ||
| 49 | void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { | ||
| 50 | const IR::U32 index_value{v.X(index_reg)}; | ||
| 51 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 52 | const IR::U32 final_offset{ | ||
| 53 | element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; | ||
| 54 | f(element, final_offset); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::ALD(u64 insn) { | ||
| 61 | union { | ||
| 62 | u64 raw; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> index_reg; | ||
| 65 | BitField<20, 10, u64> absolute_offset; | ||
| 66 | BitField<20, 11, s64> relative_offset; | ||
| 67 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 68 | BitField<32, 1, u64> o; | ||
| 69 | BitField<31, 1, u64> patch; | ||
| 70 | BitField<47, 2, Size> size; | ||
| 71 | } const ald{insn}; | ||
| 72 | |||
| 73 | const u64 offset{ald.absolute_offset.Value()}; | ||
| 74 | if (offset % 4 != 0) { | ||
| 75 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 76 | } | ||
| 77 | const IR::U32 vertex{X(ald.vertex_reg)}; | ||
| 78 | const u32 num_elements{NumElements(ald.size)}; | ||
| 79 | if (ald.index_reg == IR::Reg::RZ) { | ||
| 80 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 81 | if (ald.patch != 0) { | ||
| 82 | const IR::Patch patch{offset / 4 + element}; | ||
| 83 | F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch)); | ||
| 84 | } else { | ||
| 85 | const IR::Attribute attr{offset / 4 + element}; | ||
| 86 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex)); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | if (ald.patch != 0) { | ||
| 92 | throw NotImplementedException("Indirect patch read"); | ||
| 93 | } | ||
| 94 | HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 95 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex)); | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::AST(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 raw; | ||
| 102 | BitField<0, 8, IR::Reg> src_reg; | ||
| 103 | BitField<8, 8, IR::Reg> index_reg; | ||
| 104 | BitField<20, 10, u64> absolute_offset; | ||
| 105 | BitField<20, 11, s64> relative_offset; | ||
| 106 | BitField<31, 1, u64> patch; | ||
| 107 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 108 | BitField<47, 2, Size> size; | ||
| 109 | } const ast{insn}; | ||
| 110 | |||
| 111 | if (ast.index_reg != IR::Reg::RZ) { | ||
| 112 | throw NotImplementedException("Indexed store"); | ||
| 113 | } | ||
| 114 | const u64 offset{ast.absolute_offset.Value()}; | ||
| 115 | if (offset % 4 != 0) { | ||
| 116 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 117 | } | ||
| 118 | const IR::U32 vertex{X(ast.vertex_reg)}; | ||
| 119 | const u32 num_elements{NumElements(ast.size)}; | ||
| 120 | if (ast.index_reg == IR::Reg::RZ) { | ||
| 121 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 122 | if (ast.patch != 0) { | ||
| 123 | const IR::Patch patch{offset / 4 + element}; | ||
| 124 | ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element))); | ||
| 125 | } else { | ||
| 126 | const IR::Attribute attr{offset / 4 + element}; | ||
| 127 | ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | if (ast.patch != 0) { | ||
| 133 | throw NotImplementedException("Indexed tessellation patch store"); | ||
| 134 | } | ||
| 135 | HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 136 | ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IPA(u64 insn) { | ||
| 141 | // IPA is the instruction used to read varyings from a fragment shader. | ||
| 142 | // gl_FragCoord is mapped to the gl_Position attribute. | ||
| 143 | // It yields unknown results when used outside of the fragment shader stage. | ||
| 144 | union { | ||
| 145 | u64 raw; | ||
| 146 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 147 | BitField<8, 8, IR::Reg> index_reg; | ||
| 148 | BitField<20, 8, IR::Reg> multiplier; | ||
| 149 | BitField<30, 8, IR::Attribute> attribute; | ||
| 150 | BitField<38, 1, u64> idx; | ||
| 151 | BitField<51, 1, u64> sat; | ||
| 152 | BitField<52, 2, SampleMode> sample_mode; | ||
| 153 | BitField<54, 2, InterpolationMode> interpolation_mode; | ||
| 154 | } const ipa{insn}; | ||
| 155 | |||
| 156 | // Indexed IPAs are used for indexed varyings. | ||
| 157 | // For example: | ||
| 158 | // | ||
| 159 | // in vec4 colors[4]; | ||
| 160 | // uniform int idx; | ||
| 161 | // void main() { | ||
| 162 | // gl_FragColor = colors[idx]; | ||
| 163 | // } | ||
| 164 | const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; | ||
| 165 | const IR::Attribute attribute{ipa.attribute}; | ||
| 166 | IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg)) | ||
| 167 | : ir.GetAttribute(attribute)}; | ||
| 168 | if (IR::IsGeneric(attribute)) { | ||
| 169 | const ProgramHeader& sph{env.SPH()}; | ||
| 170 | const u32 attr_index{IR::GenericAttributeIndex(attribute)}; | ||
| 171 | const u32 element{static_cast<u32>(attribute) % 4}; | ||
| 172 | const std::array input_map{sph.ps.GenericInputMap(attr_index)}; | ||
| 173 | const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; | ||
| 174 | if (is_perspective) { | ||
| 175 | const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; | ||
| 176 | value = ir.FPMul(value, position_w); | ||
| 177 | } | ||
| 178 | } | ||
| 179 | if (ipa.interpolation_mode == InterpolationMode::Multiply) { | ||
| 180 | value = ir.FPMul(value, F(ipa.multiplier)); | ||
| 181 | } | ||
| 182 | |||
| 183 | // Saturated IPAs are generally generated out of clamped varyings. | ||
| 184 | // For example: clamp(some_varying, 0.0, 1.0) | ||
| 185 | const bool is_saturated{ipa.sat != 0}; | ||
| 186 | if (is_saturated) { | ||
| 187 | if (attribute == IR::Attribute::FrontFace) { | ||
| 188 | throw NotImplementedException("IPA.SAT on FrontFace"); | ||
| 189 | } | ||
| 190 | value = ir.FPSaturate(value); | ||
| 191 | } | ||
| 192 | |||
| 193 | F(ipa.dest_reg, value); | ||
| 194 | } | ||
| 195 | |||
| 196 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..d2a1dbf61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Size : u64 { | ||
| 12 | U8, | ||
| 13 | S8, | ||
| 14 | U16, | ||
| 15 | S16, | ||
| 16 | B32, | ||
| 17 | B64, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | IR::U32 Offset(TranslatorVisitor& v, u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 25 | BitField<20, 24, u64> absolute_offset; | ||
| 26 | BitField<20, 24, s64> relative_offset; | ||
| 27 | } const encoding{insn}; | ||
| 28 | |||
| 29 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 30 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); | ||
| 31 | } else { | ||
| 32 | const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; | ||
| 33 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) { | ||
| 38 | const IR::U32 offset{Offset(v, insn)}; | ||
| 39 | if (offset.IsImmediate()) { | ||
| 40 | return {v.ir.Imm32(offset.U32() / 4), offset}; | ||
| 41 | } else { | ||
| 42 | return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::pair<int, bool> GetSize(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<48, 3, Size> size; | ||
| 50 | } const encoding{insn}; | ||
| 51 | |||
| 52 | switch (encoding.size) { | ||
| 53 | case Size::U8: | ||
| 54 | return {8, false}; | ||
| 55 | case Size::S8: | ||
| 56 | return {8, true}; | ||
| 57 | case Size::U16: | ||
| 58 | return {16, false}; | ||
| 59 | case Size::S16: | ||
| 60 | return {16, true}; | ||
| 61 | case Size::B32: | ||
| 62 | return {32, false}; | ||
| 63 | case Size::B64: | ||
| 64 | return {64, false}; | ||
| 65 | case Size::B128: | ||
| 66 | return {128, false}; | ||
| 67 | default: | ||
| 68 | throw NotImplementedException("Invalid size {}", encoding.size.Value()); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Reg Reg(u64 insn) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<0, 8, IR::Reg> reg; | ||
| 76 | } const encoding{insn}; | ||
| 77 | |||
| 78 | return encoding.reg; | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 82 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 86 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { | ||
| 90 | const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; | ||
| 91 | const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; | ||
| 92 | return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; | ||
| 93 | } | ||
| 94 | } // Anonymous namespace | ||
| 95 | |||
| 96 | void TranslatorVisitor::LDL(u64 insn) { | ||
| 97 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 98 | const IR::U32 word{LoadLocal(*this, word_offset, offset)}; | ||
| 99 | const IR::Reg dest{Reg(insn)}; | ||
| 100 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 101 | switch (bit_size) { | ||
| 102 | case 8: { | ||
| 103 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 104 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | case 16: { | ||
| 108 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 109 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case 32: | ||
| 113 | case 64: | ||
| 114 | case 128: | ||
| 115 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 116 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 117 | } | ||
| 118 | X(dest, word); | ||
| 119 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 120 | const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; | ||
| 121 | const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; | ||
| 122 | X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); | ||
| 123 | } | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::LDS(u64 insn) { | ||
| 129 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 130 | const IR::Reg dest{Reg(insn)}; | ||
| 131 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 132 | const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; | ||
| 133 | switch (bit_size) { | ||
| 134 | case 8: | ||
| 135 | case 16: | ||
| 136 | case 32: | ||
| 137 | X(dest, IR::U32{value}); | ||
| 138 | break; | ||
| 139 | case 64: | ||
| 140 | case 128: | ||
| 141 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 142 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 143 | } | ||
| 144 | for (int element = 0; element < bit_size / 32; ++element) { | ||
| 145 | X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); | ||
| 146 | } | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | void TranslatorVisitor::STL(u64 insn) { | ||
| 152 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 153 | if (offset.IsImmediate()) { | ||
| 154 | // TODO: Support storing out of bounds at runtime | ||
| 155 | if (offset.U32() >= env.LocalMemorySize()) { | ||
| 156 | LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", | ||
| 157 | offset.U32(), env.LocalMemorySize()); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | const IR::Reg reg{Reg(insn)}; | ||
| 162 | const IR::U32 src{X(reg)}; | ||
| 163 | const int bit_size{GetSize(insn).first}; | ||
| 164 | switch (bit_size) { | ||
| 165 | case 8: { | ||
| 166 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 167 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; | ||
| 168 | ir.WriteLocal(word_offset, value); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | case 16: { | ||
| 172 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 173 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; | ||
| 174 | ir.WriteLocal(word_offset, value); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | case 32: | ||
| 178 | case 64: | ||
| 179 | case 128: | ||
| 180 | if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { | ||
| 181 | throw NotImplementedException("Unaligned source register"); | ||
| 182 | } | ||
| 183 | ir.WriteLocal(word_offset, src); | ||
| 184 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 185 | ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void TranslatorVisitor::STS(u64 insn) { | ||
| 192 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 193 | const IR::Reg reg{Reg(insn)}; | ||
| 194 | const int bit_size{GetSize(insn).first}; | ||
| 195 | switch (bit_size) { | ||
| 196 | case 8: | ||
| 197 | case 16: | ||
| 198 | case 32: | ||
| 199 | ir.WriteShared(bit_size, offset, X(reg)); | ||
| 200 | break; | ||
| 201 | case 64: | ||
| 202 | if (!IR::IsAligned(reg, 2)) { | ||
| 203 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 204 | } | ||
| 205 | ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); | ||
| 206 | break; | ||
| 207 | case 128: { | ||
| 208 | if (!IR::IsAligned(reg, 2)) { | ||
| 209 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 210 | } | ||
| 211 | const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; | ||
| 212 | ir.WriteShared(128, offset, vector); | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..36c5cff2f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class LoadSize : u64 { | ||
| 14 | U8, // Zero-extend | ||
| 15 | S8, // Sign-extend | ||
| 16 | U16, // Zero-extend | ||
| 17 | S16, // Sign-extend | ||
| 18 | B32, | ||
| 19 | B64, | ||
| 20 | B128, | ||
| 21 | U128, // ??? | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class StoreSize : u64 { | ||
| 25 | U8, // Zero-extend | ||
| 26 | S8, // Sign-extend | ||
| 27 | U16, // Zero-extend | ||
| 28 | S16, // Sign-extend | ||
| 29 | B32, | ||
| 30 | B64, | ||
| 31 | B128, | ||
| 32 | }; | ||
| 33 | |||
| 34 | // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 35 | enum class LoadCache : u64 { | ||
| 36 | CA, // Cache at all levels, likely to be accessed again | ||
| 37 | CG, // Cache at global level (cache in L2 and below, not L1) | ||
| 38 | CI, // ??? | ||
| 39 | CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||
| 40 | }; | ||
| 41 | |||
| 42 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 43 | enum class StoreCache : u64 { | ||
| 44 | WB, // Cache write-back all coherent levels | ||
| 45 | CG, // Cache at global level | ||
| 46 | CS, // Cache streaming, likely to be accessed once | ||
| 47 | WT, // Cache write-through (to system memory) | ||
| 48 | }; | ||
| 49 | |||
| 50 | IR::U64 Address(TranslatorVisitor& v, u64 insn) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 54 | BitField<20, 24, s64> addr_offset; | ||
| 55 | BitField<20, 24, u64> rz_addr_offset; | ||
| 56 | BitField<45, 1, u64> e; | ||
| 57 | } const mem{insn}; | ||
| 58 | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 60 | if (mem.e == 0) { | ||
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | ||
| 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 63 | } | ||
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | ||
| 65 | throw NotImplementedException("Unaligned address register"); | ||
| 66 | } | ||
| 67 | // Pack two registers to build the 64-bit address | ||
| 68 | return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); | ||
| 69 | }()}; | ||
| 70 | const u64 addr_offset{[&]() -> u64 { | ||
| 71 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 72 | // When RZ is used, the address is an absolute address | ||
| 73 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 74 | } else { | ||
| 75 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 76 | } | ||
| 77 | }()}; | ||
| 78 | // Apply the offset | ||
| 79 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::LDG(u64 insn) { | ||
| 84 | // LDG loads global memory into registers | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<46, 2, LoadCache> cache; | ||
| 89 | BitField<48, 3, LoadSize> size; | ||
| 90 | } const ldg{insn}; | ||
| 91 | |||
| 92 | // Pointer to load data from | ||
| 93 | const IR::U64 address{Address(*this, insn)}; | ||
| 94 | const IR::Reg dest_reg{ldg.dest_reg}; | ||
| 95 | switch (ldg.size) { | ||
| 96 | case LoadSize::U8: | ||
| 97 | X(dest_reg, ir.LoadGlobalU8(address)); | ||
| 98 | break; | ||
| 99 | case LoadSize::S8: | ||
| 100 | X(dest_reg, ir.LoadGlobalS8(address)); | ||
| 101 | break; | ||
| 102 | case LoadSize::U16: | ||
| 103 | X(dest_reg, ir.LoadGlobalU16(address)); | ||
| 104 | break; | ||
| 105 | case LoadSize::S16: | ||
| 106 | X(dest_reg, ir.LoadGlobalS16(address)); | ||
| 107 | break; | ||
| 108 | case LoadSize::B32: | ||
| 109 | X(dest_reg, ir.LoadGlobal32(address)); | ||
| 110 | break; | ||
| 111 | case LoadSize::B64: { | ||
| 112 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 113 | throw NotImplementedException("Unaligned data registers"); | ||
| 114 | } | ||
| 115 | const IR::Value vector{ir.LoadGlobal64(address)}; | ||
| 116 | for (int i = 0; i < 2; ++i) { | ||
| 117 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case LoadSize::B128: | ||
| 122 | case LoadSize::U128: { | ||
| 123 | if (!IR::IsAligned(dest_reg, 4)) { | ||
| 124 | throw NotImplementedException("Unaligned data registers"); | ||
| 125 | } | ||
| 126 | const IR::Value vector{ir.LoadGlobal128(address)}; | ||
| 127 | for (int i = 0; i < 4; ++i) { | ||
| 128 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 129 | } | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | default: | ||
| 133 | throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | void TranslatorVisitor::STG(u64 insn) { | ||
| 138 | // STG stores registers into global memory. | ||
| 139 | union { | ||
| 140 | u64 raw; | ||
| 141 | BitField<0, 8, IR::Reg> data_reg; | ||
| 142 | BitField<46, 2, StoreCache> cache; | ||
| 143 | BitField<48, 3, StoreSize> size; | ||
| 144 | } const stg{insn}; | ||
| 145 | |||
| 146 | // Pointer to store data into | ||
| 147 | const IR::U64 address{Address(*this, insn)}; | ||
| 148 | const IR::Reg data_reg{stg.data_reg}; | ||
| 149 | switch (stg.size) { | ||
| 150 | case StoreSize::U8: | ||
| 151 | ir.WriteGlobalU8(address, X(data_reg)); | ||
| 152 | break; | ||
| 153 | case StoreSize::S8: | ||
| 154 | ir.WriteGlobalS8(address, X(data_reg)); | ||
| 155 | break; | ||
| 156 | case StoreSize::U16: | ||
| 157 | ir.WriteGlobalU16(address, X(data_reg)); | ||
| 158 | break; | ||
| 159 | case StoreSize::S16: | ||
| 160 | ir.WriteGlobalS16(address, X(data_reg)); | ||
| 161 | break; | ||
| 162 | case StoreSize::B32: | ||
| 163 | ir.WriteGlobal32(address, X(data_reg)); | ||
| 164 | break; | ||
| 165 | case StoreSize::B64: { | ||
| 166 | if (!IR::IsAligned(data_reg, 2)) { | ||
| 167 | throw NotImplementedException("Unaligned data registers"); | ||
| 168 | } | ||
| 169 | const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; | ||
| 170 | ir.WriteGlobal64(address, vector); | ||
| 171 | break; | ||
| 172 | } | ||
| 173 | case StoreSize::B128: | ||
| 174 | if (!IR::IsAligned(data_reg, 4)) { | ||
| 175 | throw NotImplementedException("Unaligned data registers"); | ||
| 176 | } | ||
| 177 | const IR::Value vector{ | ||
| 178 | ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; | ||
| 179 | ir.WriteGlobal128(address, vector); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..92cd27ed4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class LogicalOp : u64 { | ||
| 13 | AND, | ||
| 14 | OR, | ||
| 15 | XOR, | ||
| 16 | PASS_B, | ||
| 17 | }; | ||
| 18 | |||
| 19 | [[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 20 | const IR::U32& operand_2, LogicalOp op) { | ||
| 21 | switch (op) { | ||
| 22 | case LogicalOp::AND: | ||
| 23 | return ir.BitwiseAnd(operand_1, operand_2); | ||
| 24 | case LogicalOp::OR: | ||
| 25 | return ir.BitwiseOr(operand_1, operand_2); | ||
| 26 | case LogicalOp::XOR: | ||
| 27 | return ir.BitwiseXor(operand_1, operand_2); | ||
| 28 | case LogicalOp::PASS_B: | ||
| 29 | return operand_2; | ||
| 30 | default: | ||
| 31 | throw NotImplementedException("Invalid Logical operation {}", op); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, | ||
| 36 | LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt, | ||
| 37 | IR::Pred dest_pred = IR::Pred::PT) { | ||
| 38 | union { | ||
| 39 | u64 insn; | ||
| 40 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 41 | BitField<8, 8, IR::Reg> src_reg; | ||
| 42 | } const lop{insn}; | ||
| 43 | |||
| 44 | if (x) { | ||
| 45 | throw NotImplementedException("X"); | ||
| 46 | } | ||
| 47 | IR::U32 op_a{v.X(lop.src_reg)}; | ||
| 48 | if (inv_a != 0) { | ||
| 49 | op_a = v.ir.BitwiseNot(op_a); | ||
| 50 | } | ||
| 51 | if (inv_b != 0) { | ||
| 52 | op_b = v.ir.BitwiseNot(op_b); | ||
| 53 | } | ||
| 54 | |||
| 55 | const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; | ||
| 56 | if (pred_op) { | ||
| 57 | const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; | ||
| 58 | v.ir.SetPred(dest_pred, pred_result); | ||
| 59 | } | ||
| 60 | if (cc) { | ||
| 61 | if (bit_op == LogicalOp::PASS_B) { | ||
| 62 | v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); | ||
| 63 | v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); | ||
| 64 | } else { | ||
| 65 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 66 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 67 | } | ||
| 68 | v.ResetCFlag(); | ||
| 69 | v.ResetOFlag(); | ||
| 70 | } | ||
| 71 | v.X(lop.dest_reg, result); | ||
| 72 | } | ||
| 73 | |||
| 74 | void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<39, 1, u64> inv_a; | ||
| 78 | BitField<40, 1, u64> inv_b; | ||
| 79 | BitField<41, 2, LogicalOp> bit_op; | ||
| 80 | BitField<43, 1, u64> x; | ||
| 81 | BitField<44, 2, PredicateOp> pred_op; | ||
| 82 | BitField<47, 1, u64> cc; | ||
| 83 | BitField<48, 3, IR::Pred> dest_pred; | ||
| 84 | } const lop{insn}; | ||
| 85 | |||
| 86 | LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, | ||
| 87 | lop.pred_op, lop.dest_pred); | ||
| 88 | } | ||
| 89 | } // Anonymous namespace | ||
| 90 | |||
| 91 | void TranslatorVisitor::LOP_reg(u64 insn) { | ||
| 92 | LOP(*this, insn, GetReg20(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::LOP_cbuf(u64 insn) { | ||
| 96 | LOP(*this, insn, GetCbuf(insn)); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::LOP_imm(u64 insn) { | ||
| 100 | LOP(*this, insn, GetImm20(insn)); | ||
| 101 | } | ||
| 102 | |||
| 103 | void TranslatorVisitor::LOP32I(u64 insn) { | ||
| 104 | union { | ||
| 105 | u64 raw; | ||
| 106 | BitField<53, 2, LogicalOp> bit_op; | ||
| 107 | BitField<57, 1, u64> x; | ||
| 108 | BitField<52, 1, u64> cc; | ||
| 109 | BitField<55, 1, u64> inv_a; | ||
| 110 | BitField<56, 1, u64> inv_b; | ||
| 111 | } const lop32i{insn}; | ||
| 112 | |||
| 113 | LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, | ||
| 114 | lop32i.inv_b != 0, lop32i.bit_op); | ||
| 115 | } | ||
| 116 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..e0fe47912 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | // https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 | ||
| 13 | // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) | ||
| 14 | IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, | ||
| 15 | u64 ttbl) { | ||
| 16 | IR::U32 r{ir.Imm32(0)}; | ||
| 17 | const IR::U32 not_a{ir.BitwiseNot(a)}; | ||
| 18 | const IR::U32 not_b{ir.BitwiseNot(b)}; | ||
| 19 | const IR::U32 not_c{ir.BitwiseNot(c)}; | ||
| 20 | if (ttbl & 0x01) { | ||
| 21 | // r |= ~a & ~b & ~c; | ||
| 22 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 23 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 24 | r = ir.BitwiseOr(r, rhs); | ||
| 25 | } | ||
| 26 | if (ttbl & 0x02) { | ||
| 27 | // r |= ~a & ~b & c; | ||
| 28 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 29 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 30 | r = ir.BitwiseOr(r, rhs); | ||
| 31 | } | ||
| 32 | if (ttbl & 0x04) { | ||
| 33 | // r |= ~a & b & ~c; | ||
| 34 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 35 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 36 | r = ir.BitwiseOr(r, rhs); | ||
| 37 | } | ||
| 38 | if (ttbl & 0x08) { | ||
| 39 | // r |= ~a & b & c; | ||
| 40 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 41 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 42 | r = ir.BitwiseOr(r, rhs); | ||
| 43 | } | ||
| 44 | if (ttbl & 0x10) { | ||
| 45 | // r |= a & ~b & ~c; | ||
| 46 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 47 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 48 | r = ir.BitwiseOr(r, rhs); | ||
| 49 | } | ||
| 50 | if (ttbl & 0x20) { | ||
| 51 | // r |= a & ~b & c; | ||
| 52 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 53 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 54 | r = ir.BitwiseOr(r, rhs); | ||
| 55 | } | ||
| 56 | if (ttbl & 0x40) { | ||
| 57 | // r |= a & b & ~c; | ||
| 58 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 59 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 60 | r = ir.BitwiseOr(r, rhs); | ||
| 61 | } | ||
| 62 | if (ttbl & 0x80) { | ||
| 63 | // r |= a & b & c; | ||
| 64 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 65 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 66 | r = ir.BitwiseOr(r, rhs); | ||
| 67 | } | ||
| 68 | return r; | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 75 | BitField<8, 8, IR::Reg> src_reg; | ||
| 76 | BitField<47, 1, u64> cc; | ||
| 77 | } const lop3{insn}; | ||
| 78 | |||
| 79 | if (lop3.cc != 0) { | ||
| 80 | throw NotImplementedException("LOP3 CC"); | ||
| 81 | } | ||
| 82 | |||
| 83 | const IR::U32 op_a{v.X(lop3.src_reg)}; | ||
| 84 | const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; | ||
| 85 | v.X(lop3.dest_reg, result); | ||
| 86 | return result; | ||
| 87 | } | ||
| 88 | |||
| 89 | u64 GetLut48(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<48, 8, u64> lut; | ||
| 93 | } const lut{insn}; | ||
| 94 | return lut.lut; | ||
| 95 | } | ||
| 96 | } // Anonymous namespace | ||
| 97 | |||
| 98 | void TranslatorVisitor::LOP3_reg(u64 insn) { | ||
| 99 | union { | ||
| 100 | u64 insn; | ||
| 101 | BitField<28, 8, u64> lut; | ||
| 102 | BitField<38, 1, u64> x; | ||
| 103 | BitField<36, 2, PredicateOp> pred_op; | ||
| 104 | BitField<48, 3, IR::Pred> pred; | ||
| 105 | } const lop3{insn}; | ||
| 106 | |||
| 107 | if (lop3.x != 0) { | ||
| 108 | throw NotImplementedException("LOP3 X"); | ||
| 109 | } | ||
| 110 | const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; | ||
| 111 | const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; | ||
| 112 | ir.SetPred(lop3.pred, pred_result); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::LOP3_cbuf(u64 insn) { | ||
| 116 | LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); | ||
| 117 | } | ||
| 118 | |||
| 119 | void TranslatorVisitor::LOP3_imm(u64 insn) { | ||
| 120 | LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); | ||
| 121 | } | ||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | } // Anonymous namespace | ||
| 16 | |||
| 17 | void TranslatorVisitor::P2R_reg(u64) { | ||
| 18 | throw NotImplementedException("P2R (reg)"); | ||
| 19 | } | ||
| 20 | |||
| 21 | void TranslatorVisitor::P2R_cbuf(u64) { | ||
| 22 | throw NotImplementedException("P2R (cbuf)"); | ||
| 23 | } | ||
| 24 | |||
| 25 | void TranslatorVisitor::P2R_imm(u64 insn) { | ||
| 26 | union { | ||
| 27 | u64 raw; | ||
| 28 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 29 | BitField<8, 8, IR::Reg> src; | ||
| 30 | BitField<40, 1, Mode> mode; | ||
| 31 | BitField<41, 2, u64> byte_selector; | ||
| 32 | } const p2r{insn}; | ||
| 33 | |||
| 34 | const u32 mask{GetImm20(insn).U32()}; | ||
| 35 | const bool pr_mode{p2r.mode == Mode::PR}; | ||
| 36 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 37 | const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; | ||
| 38 | IR::U32 insert{ir.Imm32(0)}; | ||
| 39 | for (u32 index = 0; index < num_items; ++index) { | ||
| 40 | if (((mask >> index) & 1) == 0) { | ||
| 41 | continue; | ||
| 42 | } | ||
| 43 | const IR::U1 cond{[this, index, pr_mode] { | ||
| 44 | if (pr_mode) { | ||
| 45 | return ir.GetPred(IR::Pred{index}); | ||
| 46 | } | ||
| 47 | switch (index) { | ||
| 48 | case 0: | ||
| 49 | return ir.GetZFlag(); | ||
| 50 | case 1: | ||
| 51 | return ir.GetSFlag(); | ||
| 52 | case 2: | ||
| 53 | return ir.GetCFlag(); | ||
| 54 | case 3: | ||
| 55 | return ir.GetOFlag(); | ||
| 56 | } | ||
| 57 | throw LogicError("Unreachable P2R index"); | ||
| 58 | }()}; | ||
| 59 | const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; | ||
| 60 | insert = ir.BitwiseOr(insert, bit); | ||
| 61 | } | ||
| 62 | const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; | ||
| 63 | X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..6bb08db8a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<39, 4, u64> mask; | ||
| 18 | BitField<12, 4, u64> mov32i_mask; | ||
| 19 | } const mov{insn}; | ||
| 20 | |||
| 21 | if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { | ||
| 22 | throw NotImplementedException("Non-full move mask"); | ||
| 23 | } | ||
| 24 | v.X(mov.dest_reg, src); | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::MOV_reg(u64 insn) { | ||
| 29 | MOV(*this, insn, GetReg20(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::MOV_cbuf(u64 insn) { | ||
| 33 | MOV(*this, insn, GetCbuf(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::MOV_imm(u64 insn) { | ||
| 37 | MOV(*this, insn, GetImm20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::MOV32I(u64 insn) { | ||
| 41 | MOV(*this, insn, GetImm32(insn), true); | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { | ||
| 17 | switch (index) { | ||
| 18 | case 0: | ||
| 19 | return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); | ||
| 20 | case 1: | ||
| 21 | return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); | ||
| 22 | case 2: | ||
| 23 | return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); | ||
| 24 | case 3: | ||
| 25 | return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); | ||
| 26 | default: | ||
| 27 | throw LogicError("Unreachable R2P index"); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { | ||
| 32 | union { | ||
| 33 | u64 raw; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg; | ||
| 35 | BitField<40, 1, Mode> mode; | ||
| 36 | BitField<41, 2, u64> byte_selector; | ||
| 37 | } const r2p{insn}; | ||
| 38 | const IR::U32 src{v.X(r2p.src_reg)}; | ||
| 39 | const IR::U32 count{v.ir.Imm32(1)}; | ||
| 40 | const bool pr_mode{r2p.mode == Mode::PR}; | ||
| 41 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 42 | const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8}; | ||
| 43 | for (u32 index = 0; index < num_items; ++index) { | ||
| 44 | const IR::U32 offset{v.ir.Imm32(offset_base + index)}; | ||
| 45 | const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; | ||
| 46 | const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; | ||
| 47 | const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; | ||
| 48 | const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; | ||
| 49 | if (pr_mode) { | ||
| 50 | const IR::Pred pred{index}; | ||
| 51 | v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); | ||
| 52 | } else { | ||
| 53 | SetFlag(v.ir, inv_mask_bit, src_bit, index); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::R2P_reg(u64 insn) { | ||
| 60 | R2P(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::R2P_cbuf(u64 insn) { | ||
| 64 | R2P(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::R2P_imm(u64 insn) { | ||
| 68 | R2P(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..20cb2674e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SpecialRegister : u64 { | ||
| 12 | SR_LANEID = 0, | ||
| 13 | SR_CLOCK = 1, | ||
| 14 | SR_VIRTCFG = 2, | ||
| 15 | SR_VIRTID = 3, | ||
| 16 | SR_PM0 = 4, | ||
| 17 | SR_PM1 = 5, | ||
| 18 | SR_PM2 = 6, | ||
| 19 | SR_PM3 = 7, | ||
| 20 | SR_PM4 = 8, | ||
| 21 | SR_PM5 = 9, | ||
| 22 | SR_PM6 = 10, | ||
| 23 | SR_PM7 = 11, | ||
| 24 | SR12 = 12, | ||
| 25 | SR13 = 13, | ||
| 26 | SR14 = 14, | ||
| 27 | SR_ORDERING_TICKET = 15, | ||
| 28 | SR_PRIM_TYPE = 16, | ||
| 29 | SR_INVOCATION_ID = 17, | ||
| 30 | SR_Y_DIRECTION = 18, | ||
| 31 | SR_THREAD_KILL = 19, | ||
| 32 | SM_SHADER_TYPE = 20, | ||
| 33 | SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||
| 34 | SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||
| 35 | SR_DIRECTCBEWRITEENABLE = 23, | ||
| 36 | SR_MACHINE_ID_0 = 24, | ||
| 37 | SR_MACHINE_ID_1 = 25, | ||
| 38 | SR_MACHINE_ID_2 = 26, | ||
| 39 | SR_MACHINE_ID_3 = 27, | ||
| 40 | SR_AFFINITY = 28, | ||
| 41 | SR_INVOCATION_INFO = 29, | ||
| 42 | SR_WSCALEFACTOR_XY = 30, | ||
| 43 | SR_WSCALEFACTOR_Z = 31, | ||
| 44 | SR_TID = 32, | ||
| 45 | SR_TID_X = 33, | ||
| 46 | SR_TID_Y = 34, | ||
| 47 | SR_TID_Z = 35, | ||
| 48 | SR_CTA_PARAM = 36, | ||
| 49 | SR_CTAID_X = 37, | ||
| 50 | SR_CTAID_Y = 38, | ||
| 51 | SR_CTAID_Z = 39, | ||
| 52 | SR_NTID = 40, | ||
| 53 | SR_CirQueueIncrMinusOne = 41, | ||
| 54 | SR_NLATC = 42, | ||
| 55 | SR43 = 43, | ||
| 56 | SR_SM_SPA_VERSION = 44, | ||
| 57 | SR_MULTIPASSSHADERINFO = 45, | ||
| 58 | SR_LWINHI = 46, | ||
| 59 | SR_SWINHI = 47, | ||
| 60 | SR_SWINLO = 48, | ||
| 61 | SR_SWINSZ = 49, | ||
| 62 | SR_SMEMSZ = 50, | ||
| 63 | SR_SMEMBANKS = 51, | ||
| 64 | SR_LWINLO = 52, | ||
| 65 | SR_LWINSZ = 53, | ||
| 66 | SR_LMEMLOSZ = 54, | ||
| 67 | SR_LMEMHIOFF = 55, | ||
| 68 | SR_EQMASK = 56, | ||
| 69 | SR_LTMASK = 57, | ||
| 70 | SR_LEMASK = 58, | ||
| 71 | SR_GTMASK = 59, | ||
| 72 | SR_GEMASK = 60, | ||
| 73 | SR_REGALLOC = 61, | ||
| 74 | SR_BARRIERALLOC = 62, | ||
| 75 | SR63 = 63, | ||
| 76 | SR_GLOBALERRORSTATUS = 64, | ||
| 77 | SR65 = 65, | ||
| 78 | SR_WARPERRORSTATUS = 66, | ||
| 79 | SR_WARPERRORSTATUSCLEAR = 67, | ||
| 80 | SR68 = 68, | ||
| 81 | SR69 = 69, | ||
| 82 | SR70 = 70, | ||
| 83 | SR71 = 71, | ||
| 84 | SR_PM_HI0 = 72, | ||
| 85 | SR_PM_HI1 = 73, | ||
| 86 | SR_PM_HI2 = 74, | ||
| 87 | SR_PM_HI3 = 75, | ||
| 88 | SR_PM_HI4 = 76, | ||
| 89 | SR_PM_HI5 = 77, | ||
| 90 | SR_PM_HI6 = 78, | ||
| 91 | SR_PM_HI7 = 79, | ||
| 92 | SR_CLOCKLO = 80, | ||
| 93 | SR_CLOCKHI = 81, | ||
| 94 | SR_GLOBALTIMERLO = 82, | ||
| 95 | SR_GLOBALTIMERHI = 83, | ||
| 96 | SR84 = 84, | ||
| 97 | SR85 = 85, | ||
| 98 | SR86 = 86, | ||
| 99 | SR87 = 87, | ||
| 100 | SR88 = 88, | ||
| 101 | SR89 = 89, | ||
| 102 | SR90 = 90, | ||
| 103 | SR91 = 91, | ||
| 104 | SR92 = 92, | ||
| 105 | SR93 = 93, | ||
| 106 | SR94 = 94, | ||
| 107 | SR95 = 95, | ||
| 108 | SR_HWTASKID = 96, | ||
| 109 | SR_CIRCULARQUEUEENTRYINDEX = 97, | ||
| 110 | SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, | ||
| 111 | SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, | ||
| 112 | }; | ||
| 113 | |||
| 114 | [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||
| 115 | switch (special_register) { | ||
| 116 | case SpecialRegister::SR_INVOCATION_ID: | ||
| 117 | return ir.InvocationId(); | ||
| 118 | case SpecialRegister::SR_THREAD_KILL: | ||
| 119 | return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; | ||
| 120 | case SpecialRegister::SR_INVOCATION_INFO: | ||
| 121 | LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); | ||
| 122 | return ir.Imm32(0x00ff'0000); | ||
| 123 | case SpecialRegister::SR_TID: { | ||
| 124 | const IR::Value tid{ir.LocalInvocationId()}; | ||
| 125 | return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, | ||
| 126 | IR::U32{ir.CompositeExtract(tid, 1)}, | ||
| 127 | ir.Imm32(16), ir.Imm32(8)), | ||
| 128 | IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); | ||
| 129 | } | ||
| 130 | case SpecialRegister::SR_TID_X: | ||
| 131 | return ir.LocalInvocationIdX(); | ||
| 132 | case SpecialRegister::SR_TID_Y: | ||
| 133 | return ir.LocalInvocationIdY(); | ||
| 134 | case SpecialRegister::SR_TID_Z: | ||
| 135 | return ir.LocalInvocationIdZ(); | ||
| 136 | case SpecialRegister::SR_CTAID_X: | ||
| 137 | return ir.WorkgroupIdX(); | ||
| 138 | case SpecialRegister::SR_CTAID_Y: | ||
| 139 | return ir.WorkgroupIdY(); | ||
| 140 | case SpecialRegister::SR_CTAID_Z: | ||
| 141 | return ir.WorkgroupIdZ(); | ||
| 142 | case SpecialRegister::SR_WSCALEFACTOR_XY: | ||
| 143 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); | ||
| 144 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 145 | case SpecialRegister::SR_WSCALEFACTOR_Z: | ||
| 146 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); | ||
| 147 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 148 | case SpecialRegister::SR_LANEID: | ||
| 149 | return ir.LaneId(); | ||
| 150 | case SpecialRegister::SR_EQMASK: | ||
| 151 | return ir.SubgroupEqMask(); | ||
| 152 | case SpecialRegister::SR_LTMASK: | ||
| 153 | return ir.SubgroupLtMask(); | ||
| 154 | case SpecialRegister::SR_LEMASK: | ||
| 155 | return ir.SubgroupLeMask(); | ||
| 156 | case SpecialRegister::SR_GTMASK: | ||
| 157 | return ir.SubgroupGtMask(); | ||
| 158 | case SpecialRegister::SR_GEMASK: | ||
| 159 | return ir.SubgroupGeMask(); | ||
| 160 | case SpecialRegister::SR_Y_DIRECTION: | ||
| 161 | return ir.BitCast<IR::U32>(ir.YDirection()); | ||
| 162 | case SpecialRegister::SR_AFFINITY: | ||
| 163 | LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); | ||
| 164 | return ir.Imm32(0); // This is the default value hardware returns. | ||
| 165 | default: | ||
| 166 | throw NotImplementedException("S2R special register {}", special_register); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | } // Anonymous namespace | ||
| 170 | |||
| 171 | void TranslatorVisitor::S2R(u64 insn) { | ||
| 172 | union { | ||
| 173 | u64 raw; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<20, 8, SpecialRegister> src_reg; | ||
| 176 | } const s2r{insn}; | ||
| 177 | |||
| 178 | X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..7e26ab359 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -0,0 +1,283 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | ||
| 13 | throw NotImplementedException("Instruction {} is not implemented", opcode); | ||
| 14 | } | ||
| 15 | |||
| 16 | void TranslatorVisitor::ATOM_cas(u64) { | ||
| 17 | ThrowNotImplemented(Opcode::ATOM_cas); | ||
| 18 | } | ||
| 19 | |||
| 20 | void TranslatorVisitor::ATOMS_cas(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ATOMS_cas); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::B2R(u64) { | ||
| 25 | ThrowNotImplemented(Opcode::B2R); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::BPT(u64) { | ||
| 29 | ThrowNotImplemented(Opcode::BPT); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::BRA(u64) { | ||
| 33 | ThrowNotImplemented(Opcode::BRA); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::BRK(u64) { | ||
| 37 | ThrowNotImplemented(Opcode::BRK); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::CAL() { | ||
| 41 | // CAL is a no-op | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::CCTL(u64) { | ||
| 45 | ThrowNotImplemented(Opcode::CCTL); | ||
| 46 | } | ||
| 47 | |||
| 48 | void TranslatorVisitor::CCTLL(u64) { | ||
| 49 | ThrowNotImplemented(Opcode::CCTLL); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::CONT(u64) { | ||
| 53 | ThrowNotImplemented(Opcode::CONT); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::CS2R(u64) { | ||
| 57 | ThrowNotImplemented(Opcode::CS2R); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::FCHK_reg(u64) { | ||
| 61 | ThrowNotImplemented(Opcode::FCHK_reg); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::FCHK_cbuf(u64) { | ||
| 65 | ThrowNotImplemented(Opcode::FCHK_cbuf); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::FCHK_imm(u64) { | ||
| 69 | ThrowNotImplemented(Opcode::FCHK_imm); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::GETCRSPTR(u64) { | ||
| 73 | ThrowNotImplemented(Opcode::GETCRSPTR); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::GETLMEMBASE(u64) { | ||
| 77 | ThrowNotImplemented(Opcode::GETLMEMBASE); | ||
| 78 | } | ||
| 79 | |||
| 80 | void TranslatorVisitor::IDE(u64) { | ||
| 81 | ThrowNotImplemented(Opcode::IDE); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::IDP_reg(u64) { | ||
| 85 | ThrowNotImplemented(Opcode::IDP_reg); | ||
| 86 | } | ||
| 87 | |||
| 88 | void TranslatorVisitor::IDP_imm(u64) { | ||
| 89 | ThrowNotImplemented(Opcode::IDP_imm); | ||
| 90 | } | ||
| 91 | |||
| 92 | void TranslatorVisitor::IMAD_reg(u64) { | ||
| 93 | ThrowNotImplemented(Opcode::IMAD_reg); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::IMAD_rc(u64) { | ||
| 97 | ThrowNotImplemented(Opcode::IMAD_rc); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::IMAD_cr(u64) { | ||
| 101 | ThrowNotImplemented(Opcode::IMAD_cr); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::IMAD_imm(u64) { | ||
| 105 | ThrowNotImplemented(Opcode::IMAD_imm); | ||
| 106 | } | ||
| 107 | |||
| 108 | void TranslatorVisitor::IMAD32I(u64) { | ||
| 109 | ThrowNotImplemented(Opcode::IMAD32I); | ||
| 110 | } | ||
| 111 | |||
| 112 | void TranslatorVisitor::IMADSP_reg(u64) { | ||
| 113 | ThrowNotImplemented(Opcode::IMADSP_reg); | ||
| 114 | } | ||
| 115 | |||
| 116 | void TranslatorVisitor::IMADSP_rc(u64) { | ||
| 117 | ThrowNotImplemented(Opcode::IMADSP_rc); | ||
| 118 | } | ||
| 119 | |||
| 120 | void TranslatorVisitor::IMADSP_cr(u64) { | ||
| 121 | ThrowNotImplemented(Opcode::IMADSP_cr); | ||
| 122 | } | ||
| 123 | |||
| 124 | void TranslatorVisitor::IMADSP_imm(u64) { | ||
| 125 | ThrowNotImplemented(Opcode::IMADSP_imm); | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::IMUL_reg(u64) { | ||
| 129 | ThrowNotImplemented(Opcode::IMUL_reg); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::IMUL_cbuf(u64) { | ||
| 133 | ThrowNotImplemented(Opcode::IMUL_cbuf); | ||
| 134 | } | ||
| 135 | |||
| 136 | void TranslatorVisitor::IMUL_imm(u64) { | ||
| 137 | ThrowNotImplemented(Opcode::IMUL_imm); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IMUL32I(u64) { | ||
| 141 | ThrowNotImplemented(Opcode::IMUL32I); | ||
| 142 | } | ||
| 143 | |||
| 144 | void TranslatorVisitor::JCAL(u64) { | ||
| 145 | ThrowNotImplemented(Opcode::JCAL); | ||
| 146 | } | ||
| 147 | |||
| 148 | void TranslatorVisitor::JMP(u64) { | ||
| 149 | ThrowNotImplemented(Opcode::JMP); | ||
| 150 | } | ||
| 151 | |||
| 152 | void TranslatorVisitor::KIL() { | ||
| 153 | // KIL is a no-op | ||
| 154 | } | ||
| 155 | |||
| 156 | void TranslatorVisitor::LD(u64) { | ||
| 157 | ThrowNotImplemented(Opcode::LD); | ||
| 158 | } | ||
| 159 | |||
| 160 | void TranslatorVisitor::LEPC(u64) { | ||
| 161 | ThrowNotImplemented(Opcode::LEPC); | ||
| 162 | } | ||
| 163 | |||
| 164 | void TranslatorVisitor::LONGJMP(u64) { | ||
| 165 | ThrowNotImplemented(Opcode::LONGJMP); | ||
| 166 | } | ||
| 167 | |||
| 168 | void TranslatorVisitor::NOP(u64) { | ||
| 169 | // NOP is No-Op. | ||
| 170 | } | ||
| 171 | |||
| 172 | void TranslatorVisitor::PBK() { | ||
| 173 | // PBK is a no-op | ||
| 174 | } | ||
| 175 | |||
| 176 | void TranslatorVisitor::PCNT() { | ||
| 177 | // PCNT is a no-op | ||
| 178 | } | ||
| 179 | |||
| 180 | void TranslatorVisitor::PEXIT(u64) { | ||
| 181 | ThrowNotImplemented(Opcode::PEXIT); | ||
| 182 | } | ||
| 183 | |||
| 184 | void TranslatorVisitor::PLONGJMP(u64) { | ||
| 185 | ThrowNotImplemented(Opcode::PLONGJMP); | ||
| 186 | } | ||
| 187 | |||
| 188 | void TranslatorVisitor::PRET(u64) { | ||
| 189 | ThrowNotImplemented(Opcode::PRET); | ||
| 190 | } | ||
| 191 | |||
| 192 | void TranslatorVisitor::PRMT_reg(u64) { | ||
| 193 | ThrowNotImplemented(Opcode::PRMT_reg); | ||
| 194 | } | ||
| 195 | |||
| 196 | void TranslatorVisitor::PRMT_rc(u64) { | ||
| 197 | ThrowNotImplemented(Opcode::PRMT_rc); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::PRMT_cr(u64) { | ||
| 201 | ThrowNotImplemented(Opcode::PRMT_cr); | ||
| 202 | } | ||
| 203 | |||
| 204 | void TranslatorVisitor::PRMT_imm(u64) { | ||
| 205 | ThrowNotImplemented(Opcode::PRMT_imm); | ||
| 206 | } | ||
| 207 | |||
| 208 | void TranslatorVisitor::R2B(u64) { | ||
| 209 | ThrowNotImplemented(Opcode::R2B); | ||
| 210 | } | ||
| 211 | |||
| 212 | void TranslatorVisitor::RAM(u64) { | ||
| 213 | ThrowNotImplemented(Opcode::RAM); | ||
| 214 | } | ||
| 215 | |||
| 216 | void TranslatorVisitor::RET(u64) { | ||
| 217 | ThrowNotImplemented(Opcode::RET); | ||
| 218 | } | ||
| 219 | |||
| 220 | void TranslatorVisitor::RTT(u64) { | ||
| 221 | ThrowNotImplemented(Opcode::RTT); | ||
| 222 | } | ||
| 223 | |||
| 224 | void TranslatorVisitor::SAM(u64) { | ||
| 225 | ThrowNotImplemented(Opcode::SAM); | ||
| 226 | } | ||
| 227 | |||
| 228 | void TranslatorVisitor::SETCRSPTR(u64) { | ||
| 229 | ThrowNotImplemented(Opcode::SETCRSPTR); | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SETLMEMBASE(u64) { | ||
| 233 | ThrowNotImplemented(Opcode::SETLMEMBASE); | ||
| 234 | } | ||
| 235 | |||
| 236 | void TranslatorVisitor::SSY() { | ||
| 237 | // SSY is a no-op | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::ST(u64) { | ||
| 241 | ThrowNotImplemented(Opcode::ST); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::STP(u64) { | ||
| 245 | ThrowNotImplemented(Opcode::STP); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SUATOM_cas(u64) { | ||
| 249 | ThrowNotImplemented(Opcode::SUATOM_cas); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SYNC(u64) { | ||
| 253 | ThrowNotImplemented(Opcode::SYNC); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::TXA(u64) { | ||
| 257 | ThrowNotImplemented(Opcode::TXA); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::VABSDIFF(u64) { | ||
| 261 | ThrowNotImplemented(Opcode::VABSDIFF); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::VABSDIFF4(u64) { | ||
| 265 | ThrowNotImplemented(Opcode::VABSDIFF4); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::VADD(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::VADD); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::VSET(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::VSET); | ||
| 274 | } | ||
| 275 | void TranslatorVisitor::VSHL(u64) { | ||
| 276 | ThrowNotImplemented(Opcode::VSHL); | ||
| 277 | } | ||
| 278 | |||
| 279 | void TranslatorVisitor::VSHR(u64) { | ||
| 280 | ThrowNotImplemented(Opcode::VSHR); | ||
| 281 | } | ||
| 282 | |||
| 283 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> output_reg; // Not needed on host | ||
| 16 | BitField<39, 1, u64> emit; | ||
| 17 | BitField<40, 1, u64> cut; | ||
| 18 | } const out{insn}; | ||
| 19 | |||
| 20 | stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); | ||
| 21 | |||
| 22 | if (out.emit != 0) { | ||
| 23 | v.ir.EmitVertex(stream_index); | ||
| 24 | } | ||
| 25 | if (out.cut != 0) { | ||
| 26 | v.ir.EndPrimitive(stream_index); | ||
| 27 | } | ||
| 28 | // Host doesn't need the output register, but we can write to it to avoid undefined reads | ||
| 29 | v.X(out.dest_reg, v.ir.Imm32(0)); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::OUT_reg(u64 insn) { | ||
| 34 | OUT(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::OUT_cbuf(u64 insn) { | ||
| 38 | OUT(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::OUT_imm(u64 insn) { | ||
| 42 | OUT(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | CovMask, | ||
| 14 | Covered, | ||
| 15 | Offset, | ||
| 16 | CentroidOffset, | ||
| 17 | MyIndex, | ||
| 18 | }; | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | void TranslatorVisitor::PIXLD(u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<31, 3, Mode> mode; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 26 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 27 | BitField<20, 8, s64> addr_offset; | ||
| 28 | BitField<45, 3, IR::Pred> dest_pred; | ||
| 29 | } const pixld{insn}; | ||
| 30 | |||
| 31 | if (pixld.dest_pred != IR::Pred::PT) { | ||
| 32 | throw NotImplementedException("Destination predicate"); | ||
| 33 | } | ||
| 34 | if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { | ||
| 35 | throw NotImplementedException("Non-zero source register"); | ||
| 36 | } | ||
| 37 | switch (pixld.mode) { | ||
| 38 | case Mode::MyIndex: | ||
| 39 | X(pixld.dest_reg, ir.SampleId()); | ||
| 40 | break; | ||
| 41 | default: | ||
| 42 | throw NotImplementedException("Mode {}", pixld.mode.Value()); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSETP(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 15 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 16 | BitField<12, 3, IR::Pred> pred_a; | ||
| 17 | BitField<15, 1, u64> neg_pred_a; | ||
| 18 | BitField<24, 2, BooleanOp> bop_1; | ||
| 19 | BitField<29, 3, IR::Pred> pred_b; | ||
| 20 | BitField<32, 1, u64> neg_pred_b; | ||
| 21 | BitField<39, 3, IR::Pred> pred_c; | ||
| 22 | BitField<42, 1, u64> neg_pred_c; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | } const pset{insn}; | ||
| 25 | |||
| 26 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 27 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 28 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 29 | |||
| 30 | const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 31 | const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; | ||
| 33 | const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; | ||
| 34 | |||
| 35 | ir.SetPred(pset.dest_pred_a, result_a); | ||
| 36 | ir.SetPred(pset.dest_pred_b, result_b); | ||
| 37 | } | ||
| 38 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..b02789874 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSET(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<12, 3, IR::Pred> pred_a; | ||
| 16 | BitField<15, 1, u64> neg_pred_a; | ||
| 17 | BitField<24, 2, BooleanOp> bop_1; | ||
| 18 | BitField<29, 3, IR::Pred> pred_b; | ||
| 19 | BitField<32, 1, u64> neg_pred_b; | ||
| 20 | BitField<39, 3, IR::Pred> pred_c; | ||
| 21 | BitField<42, 1, u64> neg_pred_c; | ||
| 22 | BitField<44, 1, u64> bf; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | BitField<47, 1, u64> cc; | ||
| 25 | } const pset{insn}; | ||
| 26 | |||
| 27 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 28 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 29 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 30 | |||
| 31 | const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; | ||
| 33 | |||
| 34 | const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; | ||
| 35 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 36 | |||
| 37 | const IR::U32 result{ir.Select(res_2, true_result, zero)}; | ||
| 38 | |||
| 39 | X(pset.dest_reg, result); | ||
| 40 | if (pset.cc != 0) { | ||
| 41 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 42 | SetZFlag(is_zero); | ||
| 43 | if (pset.bf != 0) { | ||
| 44 | ResetSFlag(); | ||
| 45 | } else { | ||
| 46 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 47 | } | ||
| 48 | ResetOFlag(); | ||
| 49 | ResetCFlag(); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..93baa75a9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | |||
| 12 | void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | } const sel{insn}; | ||
| 20 | |||
| 21 | const IR::U1 pred = v.ir.GetPred(sel.pred); | ||
| 22 | IR::U32 op_a{v.X(sel.src_reg)}; | ||
| 23 | IR::U32 op_b{src}; | ||
| 24 | if (sel.neg_pred != 0) { | ||
| 25 | std::swap(op_a, op_b); | ||
| 26 | } | ||
| 27 | const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; | ||
| 28 | |||
| 29 | v.X(sel.dest_reg, result); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::SEL_reg(u64 insn) { | ||
| 34 | SEL(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::SEL_cbuf(u64 insn) { | ||
| 38 | SEL(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::SEL_imm(u64 insn) { | ||
| 42 | SEL(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..63b588ad4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class Size : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | S64, | ||
| 29 | F32FTZRN, | ||
| 30 | F16x2FTZRN, | ||
| 31 | SD32, | ||
| 32 | SD64, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class AtomicOp : u64 { | ||
| 36 | ADD, | ||
| 37 | MIN, | ||
| 38 | MAX, | ||
| 39 | INC, | ||
| 40 | DEC, | ||
| 41 | AND, | ||
| 42 | OR, | ||
| 43 | XOR, | ||
| 44 | EXCH, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class Clamp : u64 { | ||
| 48 | IGN, | ||
| 49 | Default, | ||
| 50 | TRAP, | ||
| 51 | }; | ||
| 52 | |||
| 53 | TextureType GetType(Type type) { | ||
| 54 | switch (type) { | ||
| 55 | case Type::_1D: | ||
| 56 | return TextureType::Color1D; | ||
| 57 | case Type::BUFFER_1D: | ||
| 58 | return TextureType::Buffer; | ||
| 59 | case Type::ARRAY_1D: | ||
| 60 | return TextureType::ColorArray1D; | ||
| 61 | case Type::_2D: | ||
| 62 | return TextureType::Color2D; | ||
| 63 | case Type::ARRAY_2D: | ||
| 64 | return TextureType::ColorArray2D; | ||
| 65 | case Type::_3D: | ||
| 66 | return TextureType::Color3D; | ||
| 67 | } | ||
| 68 | throw NotImplementedException("Invalid type {}", type); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 72 | switch (type) { | ||
| 73 | case Type::_1D: | ||
| 74 | case Type::BUFFER_1D: | ||
| 75 | return v.X(reg); | ||
| 76 | case Type::_2D: | ||
| 77 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 78 | case Type::_3D: | ||
| 79 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 80 | default: | ||
| 81 | break; | ||
| 82 | } | ||
| 83 | throw NotImplementedException("Invalid type {}", type); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, | ||
| 87 | const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, | ||
| 88 | bool is_signed) { | ||
| 89 | switch (op) { | ||
| 90 | case AtomicOp::ADD: | ||
| 91 | return ir.ImageAtomicIAdd(handle, coords, op_b, info); | ||
| 92 | case AtomicOp::MIN: | ||
| 93 | return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); | ||
| 94 | case AtomicOp::MAX: | ||
| 95 | return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); | ||
| 96 | case AtomicOp::INC: | ||
| 97 | return ir.ImageAtomicInc(handle, coords, op_b, info); | ||
| 98 | case AtomicOp::DEC: | ||
| 99 | return ir.ImageAtomicDec(handle, coords, op_b, info); | ||
| 100 | case AtomicOp::AND: | ||
| 101 | return ir.ImageAtomicAnd(handle, coords, op_b, info); | ||
| 102 | case AtomicOp::OR: | ||
| 103 | return ir.ImageAtomicOr(handle, coords, op_b, info); | ||
| 104 | case AtomicOp::XOR: | ||
| 105 | return ir.ImageAtomicXor(handle, coords, op_b, info); | ||
| 106 | case AtomicOp::EXCH: | ||
| 107 | return ir.ImageAtomicExchange(handle, coords, op_b, info); | ||
| 108 | default: | ||
| 109 | throw NotImplementedException("Atomic Operation {}", op); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | ImageFormat Format(Size size) { | ||
| 114 | switch (size) { | ||
| 115 | case Size::U32: | ||
| 116 | case Size::S32: | ||
| 117 | case Size::SD32: | ||
| 118 | return ImageFormat::R32_UINT; | ||
| 119 | default: | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | throw NotImplementedException("Invalid size {}", size); | ||
| 123 | } | ||
| 124 | |||
| 125 | bool IsSizeInt32(Size size) { | ||
| 126 | switch (size) { | ||
| 127 | case Size::U32: | ||
| 128 | case Size::S32: | ||
| 129 | case Size::SD32: | ||
| 130 | return true; | ||
| 131 | default: | ||
| 132 | return false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, | ||
| 137 | IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, | ||
| 138 | u64 bound_offset, bool is_bindless, bool write_result) { | ||
| 139 | if (clamp != Clamp::IGN) { | ||
| 140 | throw NotImplementedException("Clamp {}", clamp); | ||
| 141 | } | ||
| 142 | if (!IsSizeInt32(size)) { | ||
| 143 | throw NotImplementedException("Size {}", size); | ||
| 144 | } | ||
| 145 | const bool is_signed{size == Size::S32}; | ||
| 146 | const ImageFormat format{Format(size)}; | ||
| 147 | const TextureType tex_type{GetType(type)}; | ||
| 148 | const IR::Value coords{MakeCoords(v, coord_reg, type)}; | ||
| 149 | |||
| 150 | const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) | ||
| 151 | : v.ir.Imm32(static_cast<u32>(bound_offset * 4))}; | ||
| 152 | IR::TextureInstInfo info{}; | ||
| 153 | info.type.Assign(tex_type); | ||
| 154 | info.image_format.Assign(format); | ||
| 155 | |||
| 156 | // TODO: float/64-bit operand | ||
| 157 | const IR::Value op_b{v.X(operand_reg)}; | ||
| 158 | const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; | ||
| 159 | |||
| 160 | if (write_result) { | ||
| 161 | v.X(dest_reg, IR::U32{color}); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } // Anonymous namespace | ||
| 165 | |||
| 166 | void TranslatorVisitor::SUATOM(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<54, 1, u64> is_bindless; | ||
| 170 | BitField<29, 4, AtomicOp> op; | ||
| 171 | BitField<33, 3, Type> type; | ||
| 172 | BitField<51, 3, Size> size; | ||
| 173 | BitField<49, 2, Clamp> clamp; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 176 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 177 | BitField<36, 13, u64> bound_offset; // !is_bindless | ||
| 178 | BitField<39, 8, IR::Reg> bindless_reg; // is_bindless | ||
| 179 | } const suatom{insn}; | ||
| 180 | |||
| 181 | ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, | ||
| 182 | suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, | ||
| 183 | suatom.is_bindless != 0, true); | ||
| 184 | } | ||
| 185 | |||
| 186 | void TranslatorVisitor::SURED(u64 insn) { | ||
| 187 | // TODO: confirm offsets | ||
| 188 | union { | ||
| 189 | u64 raw; | ||
| 190 | BitField<51, 1, u64> is_bound; | ||
| 191 | BitField<21, 3, AtomicOp> op; | ||
| 192 | BitField<33, 3, Type> type; | ||
| 193 | BitField<20, 3, Size> size; | ||
| 194 | BitField<49, 2, Clamp> clamp; | ||
| 195 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 197 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 198 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 199 | } const sured{insn}; | ||
| 200 | ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, | ||
| 201 | sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, | ||
| 202 | sured.is_bound == 0, false); | ||
| 203 | } | ||
| 204 | |||
| 205 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..681220a8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp | |||
| @@ -0,0 +1,281 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | constexpr unsigned R = 1 << 0; | ||
| 25 | constexpr unsigned G = 1 << 1; | ||
| 26 | constexpr unsigned B = 1 << 2; | ||
| 27 | constexpr unsigned A = 1 << 3; | ||
| 28 | |||
| 29 | constexpr std::array MASK{ | ||
| 30 | 0U, // | ||
| 31 | R, // | ||
| 32 | G, // | ||
| 33 | R | G, // | ||
| 34 | B, // | ||
| 35 | R | B, // | ||
| 36 | G | B, // | ||
| 37 | R | G | B, // | ||
| 38 | A, // | ||
| 39 | R | A, // | ||
| 40 | G | A, // | ||
| 41 | R | G | A, // | ||
| 42 | B | A, // | ||
| 43 | R | B | A, // | ||
| 44 | G | B | A, // | ||
| 45 | R | G | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | enum class Size : u64 { | ||
| 49 | U8, | ||
| 50 | S8, | ||
| 51 | U16, | ||
| 52 | S16, | ||
| 53 | B32, | ||
| 54 | B64, | ||
| 55 | B128, | ||
| 56 | }; | ||
| 57 | |||
| 58 | enum class Clamp : u64 { | ||
| 59 | IGN, | ||
| 60 | Default, | ||
| 61 | TRAP, | ||
| 62 | }; | ||
| 63 | |||
| 64 | // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators | ||
| 65 | enum class LoadCache : u64 { | ||
| 66 | CA, // Cache at all levels, likely to be accessed again | ||
| 67 | CG, // Cache at global level (L2 and below, not L1) | ||
| 68 | CI, // ??? | ||
| 69 | CV, // Don't cache and fetch again (volatile) | ||
| 70 | }; | ||
| 71 | |||
| 72 | enum class StoreCache : u64 { | ||
| 73 | WB, // Cache write-back all coherent levels | ||
| 74 | CG, // Cache at global level (L2 and below, not L1) | ||
| 75 | CS, // Cache streaming, likely to be accessed once | ||
| 76 | WT, // Cache write-through (to system memory, volatile?) | ||
| 77 | }; | ||
| 78 | |||
| 79 | ImageFormat Format(Size size) { | ||
| 80 | switch (size) { | ||
| 81 | case Size::U8: | ||
| 82 | return ImageFormat::R8_UINT; | ||
| 83 | case Size::S8: | ||
| 84 | return ImageFormat::R8_SINT; | ||
| 85 | case Size::U16: | ||
| 86 | return ImageFormat::R16_UINT; | ||
| 87 | case Size::S16: | ||
| 88 | return ImageFormat::R16_SINT; | ||
| 89 | case Size::B32: | ||
| 90 | return ImageFormat::R32_UINT; | ||
| 91 | case Size::B64: | ||
| 92 | return ImageFormat::R32G32_UINT; | ||
| 93 | case Size::B128: | ||
| 94 | return ImageFormat::R32G32B32A32_UINT; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid size {}", size); | ||
| 97 | } | ||
| 98 | |||
| 99 | int SizeInRegs(Size size) { | ||
| 100 | switch (size) { | ||
| 101 | case Size::U8: | ||
| 102 | case Size::S8: | ||
| 103 | case Size::U16: | ||
| 104 | case Size::S16: | ||
| 105 | case Size::B32: | ||
| 106 | return 1; | ||
| 107 | case Size::B64: | ||
| 108 | return 2; | ||
| 109 | case Size::B128: | ||
| 110 | return 4; | ||
| 111 | } | ||
| 112 | throw NotImplementedException("Invalid size {}", size); | ||
| 113 | } | ||
| 114 | |||
| 115 | TextureType GetType(Type type) { | ||
| 116 | switch (type) { | ||
| 117 | case Type::_1D: | ||
| 118 | return TextureType::Color1D; | ||
| 119 | case Type::BUFFER_1D: | ||
| 120 | return TextureType::Buffer; | ||
| 121 | case Type::ARRAY_1D: | ||
| 122 | return TextureType::ColorArray1D; | ||
| 123 | case Type::_2D: | ||
| 124 | return TextureType::Color2D; | ||
| 125 | case Type::ARRAY_2D: | ||
| 126 | return TextureType::ColorArray2D; | ||
| 127 | case Type::_3D: | ||
| 128 | return TextureType::Color3D; | ||
| 129 | } | ||
| 130 | throw NotImplementedException("Invalid type {}", type); | ||
| 131 | } | ||
| 132 | |||
| 133 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 134 | const auto array{[&](int index) { | ||
| 135 | return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); | ||
| 136 | }}; | ||
| 137 | switch (type) { | ||
| 138 | case Type::_1D: | ||
| 139 | case Type::BUFFER_1D: | ||
| 140 | return v.X(reg); | ||
| 141 | case Type::ARRAY_1D: | ||
| 142 | return v.ir.CompositeConstruct(v.X(reg), array(1)); | ||
| 143 | case Type::_2D: | ||
| 144 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 145 | case Type::ARRAY_2D: | ||
| 146 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); | ||
| 147 | case Type::_3D: | ||
| 148 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 149 | } | ||
| 150 | throw NotImplementedException("Invalid type {}", type); | ||
| 151 | } | ||
| 152 | |||
| 153 | unsigned SwizzleMask(u64 swizzle) { | ||
| 154 | if (swizzle == 0 || swizzle >= MASK.size()) { | ||
| 155 | throw NotImplementedException("Invalid swizzle {}", swizzle); | ||
| 156 | } | ||
| 157 | return MASK[swizzle]; | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { | ||
| 161 | std::array<IR::U32, 4> colors; | ||
| 162 | for (int i = 0; i < num_regs; ++i) { | ||
| 163 | colors[static_cast<size_t>(i)] = ir.GetReg(reg + i); | ||
| 164 | } | ||
| 165 | for (int i = num_regs; i < 4; ++i) { | ||
| 166 | colors[static_cast<size_t>(i)] = ir.Imm32(0); | ||
| 167 | } | ||
| 168 | return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); | ||
| 169 | } | ||
| 170 | } // Anonymous namespace | ||
| 171 | |||
| 172 | void TranslatorVisitor::SULD(u64 insn) { | ||
| 173 | union { | ||
| 174 | u64 raw; | ||
| 175 | BitField<51, 1, u64> is_bound; | ||
| 176 | BitField<52, 1, u64> d; | ||
| 177 | BitField<23, 1, u64> ba; | ||
| 178 | BitField<33, 3, Type> type; | ||
| 179 | BitField<24, 2, LoadCache> cache; | ||
| 180 | BitField<20, 3, Size> size; // .D | ||
| 181 | BitField<20, 4, u64> swizzle; // .P | ||
| 182 | BitField<49, 2, Clamp> clamp; | ||
| 183 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 184 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 185 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 186 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 187 | } const suld{insn}; | ||
| 188 | |||
| 189 | if (suld.clamp != Clamp::IGN) { | ||
| 190 | throw NotImplementedException("Clamp {}", suld.clamp.Value()); | ||
| 191 | } | ||
| 192 | if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { | ||
| 193 | throw NotImplementedException("Cache {}", suld.cache.Value()); | ||
| 194 | } | ||
| 195 | const bool is_typed{suld.d != 0}; | ||
| 196 | if (is_typed && suld.ba != 0) { | ||
| 197 | throw NotImplementedException("BA"); | ||
| 198 | } | ||
| 199 | |||
| 200 | const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; | ||
| 201 | const TextureType type{GetType(suld.type)}; | ||
| 202 | const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; | ||
| 203 | const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4)) | ||
| 204 | : X(suld.bindless_reg)}; | ||
| 205 | IR::TextureInstInfo info{}; | ||
| 206 | info.type.Assign(type); | ||
| 207 | info.image_format.Assign(format); | ||
| 208 | |||
| 209 | const IR::Value result{ir.ImageRead(handle, coords, info)}; | ||
| 210 | IR::Reg dest_reg{suld.dest_reg}; | ||
| 211 | if (is_typed) { | ||
| 212 | const int num_regs{SizeInRegs(suld.size)}; | ||
| 213 | for (int i = 0; i < num_regs; ++i) { | ||
| 214 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 215 | } | ||
| 216 | } else { | ||
| 217 | const unsigned mask{SwizzleMask(suld.swizzle)}; | ||
| 218 | const int bits{std::popcount(mask)}; | ||
| 219 | if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) { | ||
| 220 | throw NotImplementedException("Unaligned destination register"); | ||
| 221 | } | ||
| 222 | for (unsigned component = 0; component < 4; ++component) { | ||
| 223 | if (((mask >> component) & 1) == 0) { | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); | ||
| 227 | ++dest_reg; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SUST(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<51, 1, u64> is_bound; | ||
| 236 | BitField<52, 1, u64> d; | ||
| 237 | BitField<23, 1, u64> ba; | ||
| 238 | BitField<33, 3, Type> type; | ||
| 239 | BitField<24, 2, StoreCache> cache; | ||
| 240 | BitField<20, 3, Size> size; // .D | ||
| 241 | BitField<20, 4, u64> swizzle; // .P | ||
| 242 | BitField<49, 2, Clamp> clamp; | ||
| 243 | BitField<0, 8, IR::Reg> data_reg; | ||
| 244 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 245 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 246 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 247 | } const sust{insn}; | ||
| 248 | |||
| 249 | if (sust.clamp != Clamp::IGN) { | ||
| 250 | throw NotImplementedException("Clamp {}", sust.clamp.Value()); | ||
| 251 | } | ||
| 252 | if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { | ||
| 253 | throw NotImplementedException("Cache {}", sust.cache.Value()); | ||
| 254 | } | ||
| 255 | const bool is_typed{sust.d != 0}; | ||
| 256 | if (is_typed && sust.ba != 0) { | ||
| 257 | throw NotImplementedException("BA"); | ||
| 258 | } | ||
| 259 | const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; | ||
| 260 | const TextureType type{GetType(sust.type)}; | ||
| 261 | const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; | ||
| 262 | const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4)) | ||
| 263 | : X(sust.bindless_reg)}; | ||
| 264 | IR::TextureInstInfo info{}; | ||
| 265 | info.type.Assign(type); | ||
| 266 | info.image_format.Assign(format); | ||
| 267 | |||
| 268 | IR::Value color; | ||
| 269 | if (is_typed) { | ||
| 270 | color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); | ||
| 271 | } else { | ||
| 272 | const unsigned mask{SwizzleMask(sust.swizzle)}; | ||
| 273 | if (mask != 0xf) { | ||
| 274 | throw NotImplementedException("Non-full mask"); | ||
| 275 | } | ||
| 276 | color = MakeColor(ir, sust.data_reg, 4); | ||
| 277 | } | ||
| 278 | ir.ImageWrite(handle, coords, color, info); | ||
| 279 | } | ||
| 280 | |||
| 281 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..0046b5edd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -0,0 +1,236 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Blod : u64 { | ||
| 15 | None, | ||
| 16 | LZ, | ||
| 17 | LB, | ||
| 18 | LL, | ||
| 19 | INVALIDBLOD4, | ||
| 20 | INVALIDBLOD5, | ||
| 21 | LBA, | ||
| 22 | LLA, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class TextureType : u64 { | ||
| 26 | _1D, | ||
| 27 | ARRAY_1D, | ||
| 28 | _2D, | ||
| 29 | ARRAY_2D, | ||
| 30 | _3D, | ||
| 31 | ARRAY_3D, | ||
| 32 | CUBE, | ||
| 33 | ARRAY_CUBE, | ||
| 34 | }; | ||
| 35 | |||
| 36 | Shader::TextureType GetType(TextureType type) { | ||
| 37 | switch (type) { | ||
| 38 | case TextureType::_1D: | ||
| 39 | return Shader::TextureType::Color1D; | ||
| 40 | case TextureType::ARRAY_1D: | ||
| 41 | return Shader::TextureType::ColorArray1D; | ||
| 42 | case TextureType::_2D: | ||
| 43 | return Shader::TextureType::Color2D; | ||
| 44 | case TextureType::ARRAY_2D: | ||
| 45 | return Shader::TextureType::ColorArray2D; | ||
| 46 | case TextureType::_3D: | ||
| 47 | return Shader::TextureType::Color3D; | ||
| 48 | case TextureType::ARRAY_3D: | ||
| 49 | throw NotImplementedException("3D array texture type"); | ||
| 50 | case TextureType::CUBE: | ||
| 51 | return Shader::TextureType::ColorCube; | ||
| 52 | case TextureType::ARRAY_CUBE: | ||
| 53 | return Shader::TextureType::ColorArrayCube; | ||
| 54 | } | ||
| 55 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 56 | } | ||
| 57 | |||
| 58 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 59 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 60 | switch (type) { | ||
| 61 | case TextureType::_1D: | ||
| 62 | return v.F(reg); | ||
| 63 | case TextureType::ARRAY_1D: | ||
| 64 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 65 | case TextureType::_2D: | ||
| 66 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 67 | case TextureType::ARRAY_2D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 69 | case TextureType::_3D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 71 | case TextureType::ARRAY_3D: | ||
| 72 | throw NotImplementedException("3D array texture type"); | ||
| 73 | case TextureType::CUBE: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_CUBE: | ||
| 76 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 77 | } | ||
| 78 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||
| 82 | switch (blod) { | ||
| 83 | case Blod::None: | ||
| 84 | return v.ir.Imm32(0.0f); | ||
| 85 | case Blod::LZ: | ||
| 86 | return v.ir.Imm32(0.0f); | ||
| 87 | case Blod::LB: | ||
| 88 | case Blod::LL: | ||
| 89 | case Blod::LBA: | ||
| 90 | case Blod::LLA: | ||
| 91 | return v.F(reg++); | ||
| 92 | case Blod::INVALIDBLOD4: | ||
| 93 | case Blod::INVALIDBLOD5: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid blod {}", blod); | ||
| 97 | } | ||
| 98 | |||
| 99 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 100 | const IR::U32 value{v.X(reg++)}; | ||
| 101 | switch (type) { | ||
| 102 | case TextureType::_1D: | ||
| 103 | case TextureType::ARRAY_1D: | ||
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 105 | case TextureType::_2D: | ||
| 106 | case TextureType::ARRAY_2D: | ||
| 107 | return v.ir.CompositeConstruct( | ||
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 109 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 110 | case TextureType::_3D: | ||
| 111 | case TextureType::ARRAY_3D: | ||
| 112 | return v.ir.CompositeConstruct( | ||
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 116 | case TextureType::CUBE: | ||
| 117 | case TextureType::ARRAY_CUBE: | ||
| 118 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 119 | } | ||
| 120 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool HasExplicitLod(Blod blod) { | ||
| 124 | switch (blod) { | ||
| 125 | case Blod::LL: | ||
| 126 | case Blod::LLA: | ||
| 127 | case Blod::LZ: | ||
| 128 | return true; | ||
| 129 | default: | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||
| 135 | std::optional<u32> cbuf_offset) { | ||
| 136 | union { | ||
| 137 | u64 raw; | ||
| 138 | BitField<35, 1, u64> ndv; | ||
| 139 | BitField<49, 1, u64> nodep; | ||
| 140 | BitField<50, 1, u64> dc; | ||
| 141 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 142 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 143 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 144 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 145 | BitField<28, 3, TextureType> type; | ||
| 146 | BitField<31, 4, u64> mask; | ||
| 147 | } const tex{insn}; | ||
| 148 | |||
| 149 | if (lc) { | ||
| 150 | throw NotImplementedException("LC"); | ||
| 151 | } | ||
| 152 | const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||
| 153 | |||
| 154 | IR::Reg meta_reg{tex.meta_reg}; | ||
| 155 | IR::Value handle; | ||
| 156 | IR::Value offset; | ||
| 157 | IR::F32 dref; | ||
| 158 | IR::F32 lod_clamp; | ||
| 159 | if (cbuf_offset) { | ||
| 160 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 161 | } else { | ||
| 162 | handle = v.X(meta_reg++); | ||
| 163 | } | ||
| 164 | const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||
| 165 | if (aoffi) { | ||
| 166 | offset = MakeOffset(v, meta_reg, tex.type); | ||
| 167 | } | ||
| 168 | if (tex.dc != 0) { | ||
| 169 | dref = v.F(meta_reg++); | ||
| 170 | } | ||
| 171 | IR::TextureInstInfo info{}; | ||
| 172 | info.type.Assign(GetType(tex.type)); | ||
| 173 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | ||
| 174 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||
| 175 | info.has_lod_clamp.Assign(lc ? 1 : 0); | ||
| 176 | |||
| 177 | const IR::Value sample{[&]() -> IR::Value { | ||
| 178 | if (tex.dc == 0) { | ||
| 179 | if (HasExplicitLod(blod)) { | ||
| 180 | return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); | ||
| 181 | } else { | ||
| 182 | return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | if (HasExplicitLod(blod)) { | ||
| 186 | return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); | ||
| 187 | } else { | ||
| 188 | return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 189 | info); | ||
| 190 | } | ||
| 191 | }()}; | ||
| 192 | |||
| 193 | IR::Reg dest_reg{tex.dest_reg}; | ||
| 194 | for (int element = 0; element < 4; ++element) { | ||
| 195 | if (((tex.mask >> element) & 1) == 0) { | ||
| 196 | continue; | ||
| 197 | } | ||
| 198 | IR::F32 value; | ||
| 199 | if (tex.dc != 0) { | ||
| 200 | value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||
| 201 | } else { | ||
| 202 | value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; | ||
| 203 | } | ||
| 204 | v.F(dest_reg, value); | ||
| 205 | ++dest_reg; | ||
| 206 | } | ||
| 207 | if (tex.sparse_pred != IR::Pred::PT) { | ||
| 208 | v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | } // Anonymous namespace | ||
| 212 | |||
| 213 | void TranslatorVisitor::TEX(u64 insn) { | ||
| 214 | union { | ||
| 215 | u64 raw; | ||
| 216 | BitField<54, 1, u64> aoffi; | ||
| 217 | BitField<55, 3, Blod> blod; | ||
| 218 | BitField<58, 1, u64> lc; | ||
| 219 | BitField<36, 13, u64> cbuf_offset; | ||
| 220 | } const tex{insn}; | ||
| 221 | |||
| 222 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); | ||
| 223 | } | ||
| 224 | |||
| 225 | void TranslatorVisitor::TEX_b(u64 insn) { | ||
| 226 | union { | ||
| 227 | u64 raw; | ||
| 228 | BitField<36, 1, u64> aoffi; | ||
| 229 | BitField<37, 3, Blod> blod; | ||
| 230 | BitField<40, 1, u64> lc; | ||
| 231 | } const tex{insn}; | ||
| 232 | |||
| 233 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..154e7f1a1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | union Encoding { | ||
| 20 | u64 raw; | ||
| 21 | BitField<59, 1, Precision> precision; | ||
| 22 | BitField<53, 4, u64> encoding; | ||
| 23 | BitField<49, 1, u64> nodep; | ||
| 24 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 26 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 27 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 28 | BitField<36, 13, u64> cbuf_offset; | ||
| 29 | BitField<50, 3, u64> swizzle; | ||
| 30 | }; | ||
| 31 | |||
| 32 | constexpr unsigned R = 1; | ||
| 33 | constexpr unsigned G = 2; | ||
| 34 | constexpr unsigned B = 4; | ||
| 35 | constexpr unsigned A = 8; | ||
| 36 | |||
| 37 | constexpr std::array RG_LUT{ | ||
| 38 | R, // | ||
| 39 | G, // | ||
| 40 | B, // | ||
| 41 | A, // | ||
| 42 | R | G, // | ||
| 43 | R | A, // | ||
| 44 | G | A, // | ||
| 45 | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | constexpr std::array RGBA_LUT{ | ||
| 49 | R | G | B, // | ||
| 50 | R | G | A, // | ||
| 51 | R | B | A, // | ||
| 52 | G | B | A, // | ||
| 53 | R | G | B | A, // | ||
| 54 | }; | ||
| 55 | |||
| 56 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 57 | if (!IR::IsAligned(reg, alignment)) { | ||
| 58 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <typename... Args> | ||
| 63 | IR::Value Composite(TranslatorVisitor& v, Args... regs) { | ||
| 64 | return v.ir.CompositeConstruct(v.F(regs)...); | ||
| 65 | } | ||
| 66 | |||
| 67 | IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { | ||
| 68 | return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding texs{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; | ||
| 74 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 75 | const IR::Reg reg_a{texs.src_reg_a}; | ||
| 76 | const IR::Reg reg_b{texs.src_reg_b}; | ||
| 77 | IR::TextureInstInfo info{}; | ||
| 78 | if (texs.precision == Precision::F16) { | ||
| 79 | info.relaxed_precision.Assign(1); | ||
| 80 | } | ||
| 81 | switch (texs.encoding) { | ||
| 82 | case 0: // 1D.LZ | ||
| 83 | info.type.Assign(TextureType::Color1D); | ||
| 84 | return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); | ||
| 85 | case 1: // 2D | ||
| 86 | info.type.Assign(TextureType::Color2D); | ||
| 87 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); | ||
| 88 | case 2: // 2D.LZ | ||
| 89 | info.type.Assign(TextureType::Color2D); | ||
| 90 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); | ||
| 91 | case 3: // 2D.LL | ||
| 92 | CheckAlignment(reg_a, 2); | ||
| 93 | info.type.Assign(TextureType::Color2D); | ||
| 94 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, | ||
| 95 | info); | ||
| 96 | case 4: // 2D.DC | ||
| 97 | CheckAlignment(reg_a, 2); | ||
| 98 | info.type.Assign(TextureType::Color2D); | ||
| 99 | info.is_depth.Assign(1); | ||
| 100 | return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 101 | {}, {}, {}, info); | ||
| 102 | case 5: // 2D.LL.DC | ||
| 103 | CheckAlignment(reg_a, 2); | ||
| 104 | CheckAlignment(reg_b, 2); | ||
| 105 | info.type.Assign(TextureType::Color2D); | ||
| 106 | info.is_depth.Assign(1); | ||
| 107 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), | ||
| 108 | v.F(reg_b + 1), v.F(reg_b), {}, info); | ||
| 109 | case 6: // 2D.LZ.DC | ||
| 110 | CheckAlignment(reg_a, 2); | ||
| 111 | info.type.Assign(TextureType::Color2D); | ||
| 112 | info.is_depth.Assign(1); | ||
| 113 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 114 | zero, {}, info); | ||
| 115 | case 7: // ARRAY_2D | ||
| 116 | CheckAlignment(reg_a, 2); | ||
| 117 | info.type.Assign(TextureType::ColorArray2D); | ||
| 118 | return v.ir.ImageSampleImplicitLod( | ||
| 119 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 120 | {}, {}, {}, info); | ||
| 121 | case 8: // ARRAY_2D.LZ | ||
| 122 | CheckAlignment(reg_a, 2); | ||
| 123 | info.type.Assign(TextureType::ColorArray2D); | ||
| 124 | return v.ir.ImageSampleExplicitLod( | ||
| 125 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 126 | zero, {}, info); | ||
| 127 | case 9: // ARRAY_2D.LZ.DC | ||
| 128 | CheckAlignment(reg_a, 2); | ||
| 129 | CheckAlignment(reg_b, 2); | ||
| 130 | info.type.Assign(TextureType::ColorArray2D); | ||
| 131 | info.is_depth.Assign(1); | ||
| 132 | return v.ir.ImageSampleDrefExplicitLod( | ||
| 133 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 134 | v.F(reg_b + 1), zero, {}, info); | ||
| 135 | case 10: // 3D | ||
| 136 | CheckAlignment(reg_a, 2); | ||
| 137 | info.type.Assign(TextureType::Color3D); | ||
| 138 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 139 | {}, info); | ||
| 140 | case 11: // 3D.LZ | ||
| 141 | CheckAlignment(reg_a, 2); | ||
| 142 | info.type.Assign(TextureType::Color3D); | ||
| 143 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, | ||
| 144 | info); | ||
| 145 | case 12: // CUBE | ||
| 146 | CheckAlignment(reg_a, 2); | ||
| 147 | info.type.Assign(TextureType::ColorCube); | ||
| 148 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 149 | {}, info); | ||
| 150 | case 13: // CUBE.LL | ||
| 151 | CheckAlignment(reg_a, 2); | ||
| 152 | CheckAlignment(reg_b, 2); | ||
| 153 | info.type.Assign(TextureType::ColorCube); | ||
| 154 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), | ||
| 155 | v.F(reg_b + 1), {}, info); | ||
| 156 | default: | ||
| 157 | throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | unsigned Swizzle(u64 insn) { | ||
| 162 | const Encoding texs{insn}; | ||
| 163 | const size_t encoding{texs.swizzle}; | ||
| 164 | if (texs.dest_reg_b == IR::Reg::RZ) { | ||
| 165 | if (encoding >= RG_LUT.size()) { | ||
| 166 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 167 | } | ||
| 168 | return RG_LUT[encoding]; | ||
| 169 | } else { | ||
| 170 | if (encoding >= RGBA_LUT.size()) { | ||
| 171 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 172 | } | ||
| 173 | return RGBA_LUT[encoding]; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 178 | const bool is_shadow{sample.Type() == IR::Type::F32}; | ||
| 179 | if (is_shadow) { | ||
| 180 | const bool is_alpha{component == 3}; | ||
| 181 | return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; | ||
| 182 | } else { | ||
| 183 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 188 | const Encoding texs{insn}; | ||
| 189 | switch (index) { | ||
| 190 | case 0: | ||
| 191 | return texs.dest_reg_a; | ||
| 192 | case 1: | ||
| 193 | CheckAlignment(texs.dest_reg_a, 2); | ||
| 194 | return texs.dest_reg_a + 1; | ||
| 195 | case 2: | ||
| 196 | return texs.dest_reg_b; | ||
| 197 | case 3: | ||
| 198 | CheckAlignment(texs.dest_reg_b, 2); | ||
| 199 | return texs.dest_reg_b + 1; | ||
| 200 | } | ||
| 201 | throw LogicError("Invalid store index {}", index); | ||
| 202 | } | ||
| 203 | |||
| 204 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 205 | const unsigned swizzle{Swizzle(insn)}; | ||
| 206 | unsigned store_index{0}; | ||
| 207 | for (unsigned component = 0; component < 4; ++component) { | ||
| 208 | if (((swizzle >> component) & 1) == 0) { | ||
| 209 | continue; | ||
| 210 | } | ||
| 211 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 212 | v.F(dest, Extract(v, sample, component)); | ||
| 213 | ++store_index; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 218 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 222 | const unsigned swizzle{Swizzle(insn)}; | ||
| 223 | unsigned store_index{0}; | ||
| 224 | std::array<IR::F32, 4> swizzled; | ||
| 225 | for (unsigned component = 0; component < 4; ++component) { | ||
| 226 | if (((swizzle >> component) & 1) == 0) { | ||
| 227 | continue; | ||
| 228 | } | ||
| 229 | swizzled[store_index] = Extract(v, sample, component); | ||
| 230 | ++store_index; | ||
| 231 | } | ||
| 232 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 233 | const Encoding texs{insn}; | ||
| 234 | switch (store_index) { | ||
| 235 | case 1: | ||
| 236 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 237 | break; | ||
| 238 | case 2: | ||
| 239 | case 3: | ||
| 240 | case 4: | ||
| 241 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 242 | switch (store_index) { | ||
| 243 | case 2: | ||
| 244 | break; | ||
| 245 | case 3: | ||
| 246 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 247 | break; | ||
| 248 | case 4: | ||
| 249 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | break; | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } // Anonymous namespace | ||
| 256 | |||
| 257 | void TranslatorVisitor::TEXS(u64 insn) { | ||
| 258 | const IR::Value sample{Sample(*this, insn)}; | ||
| 259 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 260 | Store32(*this, insn, sample); | ||
| 261 | } else { | ||
| 262 | Store16(*this, insn, sample); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..218cbc1a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class OffsetType : u64 { | ||
| 27 | None = 0, | ||
| 28 | AOFFI, | ||
| 29 | PTP, | ||
| 30 | Invalid, | ||
| 31 | }; | ||
| 32 | |||
| 33 | enum class ComponentType : u64 { | ||
| 34 | R = 0, | ||
| 35 | G = 1, | ||
| 36 | B = 2, | ||
| 37 | A = 3, | ||
| 38 | }; | ||
| 39 | |||
| 40 | Shader::TextureType GetType(TextureType type) { | ||
| 41 | switch (type) { | ||
| 42 | case TextureType::_1D: | ||
| 43 | return Shader::TextureType::Color1D; | ||
| 44 | case TextureType::ARRAY_1D: | ||
| 45 | return Shader::TextureType::ColorArray1D; | ||
| 46 | case TextureType::_2D: | ||
| 47 | return Shader::TextureType::Color2D; | ||
| 48 | case TextureType::ARRAY_2D: | ||
| 49 | return Shader::TextureType::ColorArray2D; | ||
| 50 | case TextureType::_3D: | ||
| 51 | return Shader::TextureType::Color3D; | ||
| 52 | case TextureType::ARRAY_3D: | ||
| 53 | throw NotImplementedException("3D array texture type"); | ||
| 54 | case TextureType::CUBE: | ||
| 55 | return Shader::TextureType::ColorCube; | ||
| 56 | case TextureType::ARRAY_CUBE: | ||
| 57 | return Shader::TextureType::ColorArrayCube; | ||
| 58 | } | ||
| 59 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 60 | } | ||
| 61 | |||
| 62 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 63 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 64 | switch (type) { | ||
| 65 | case TextureType::_1D: | ||
| 66 | return v.F(reg); | ||
| 67 | case TextureType::ARRAY_1D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 69 | case TextureType::_2D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 71 | case TextureType::ARRAY_2D: | ||
| 72 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 73 | case TextureType::_3D: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_3D: | ||
| 76 | throw NotImplementedException("3D array texture type"); | ||
| 77 | case TextureType::CUBE: | ||
| 78 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 79 | case TextureType::ARRAY_CUBE: | ||
| 80 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 81 | } | ||
| 82 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 86 | const IR::U32 value{v.X(reg++)}; | ||
| 87 | switch (type) { | ||
| 88 | case TextureType::_1D: | ||
| 89 | case TextureType::ARRAY_1D: | ||
| 90 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); | ||
| 91 | case TextureType::_2D: | ||
| 92 | case TextureType::ARRAY_2D: | ||
| 93 | return v.ir.CompositeConstruct( | ||
| 94 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 95 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 96 | case TextureType::_3D: | ||
| 97 | case TextureType::ARRAY_3D: | ||
| 98 | return v.ir.CompositeConstruct( | ||
| 99 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 100 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | ||
| 101 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); | ||
| 102 | case TextureType::CUBE: | ||
| 103 | case TextureType::ARRAY_CUBE: | ||
| 104 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 105 | } | ||
| 106 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 107 | } | ||
| 108 | |||
| 109 | std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | ||
| 110 | const IR::U32 value1{v.X(reg++)}; | ||
| 111 | const IR::U32 value2{v.X(reg++)}; | ||
| 112 | const IR::U32 bitsize{v.ir.Imm32(6)}; | ||
| 113 | const auto make_vector{[&v, &bitsize](const IR::U32& value) { | ||
| 114 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), | ||
| 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), | ||
| 117 | v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); | ||
| 118 | }}; | ||
| 119 | return {make_vector(value1), make_vector(value2)}; | ||
| 120 | } | ||
| 121 | |||
| 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | ||
| 123 | bool is_bindless) { | ||
| 124 | union { | ||
| 125 | u64 raw; | ||
| 126 | BitField<35, 1, u64> ndv; | ||
| 127 | BitField<49, 1, u64> nodep; | ||
| 128 | BitField<50, 1, u64> dc; | ||
| 129 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 130 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 131 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 132 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 133 | BitField<28, 3, TextureType> type; | ||
| 134 | BitField<31, 4, u64> mask; | ||
| 135 | BitField<36, 13, u64> cbuf_offset; | ||
| 136 | } const tld4{insn}; | ||
| 137 | |||
| 138 | const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; | ||
| 139 | |||
| 140 | IR::Reg meta_reg{tld4.meta_reg}; | ||
| 141 | IR::Value handle; | ||
| 142 | IR::Value offset; | ||
| 143 | IR::Value offset2; | ||
| 144 | IR::F32 dref; | ||
| 145 | if (!is_bindless) { | ||
| 146 | handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); | ||
| 147 | } else { | ||
| 148 | handle = v.X(meta_reg++); | ||
| 149 | } | ||
| 150 | switch (offset_type) { | ||
| 151 | case OffsetType::None: | ||
| 152 | break; | ||
| 153 | case OffsetType::AOFFI: | ||
| 154 | offset = MakeOffset(v, meta_reg, tld4.type); | ||
| 155 | break; | ||
| 156 | case OffsetType::PTP: | ||
| 157 | std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); | ||
| 158 | break; | ||
| 159 | default: | ||
| 160 | throw NotImplementedException("Invalid offset type {}", offset_type); | ||
| 161 | } | ||
| 162 | if (tld4.dc != 0) { | ||
| 163 | dref = v.F(meta_reg++); | ||
| 164 | } | ||
| 165 | IR::TextureInstInfo info{}; | ||
| 166 | info.type.Assign(GetType(tld4.type)); | ||
| 167 | info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); | ||
| 168 | info.gather_component.Assign(static_cast<u32>(component_type)); | ||
| 169 | const IR::Value sample{[&] { | ||
| 170 | if (tld4.dc == 0) { | ||
| 171 | return v.ir.ImageGather(handle, coords, offset, offset2, info); | ||
| 172 | } | ||
| 173 | return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); | ||
| 174 | }()}; | ||
| 175 | |||
| 176 | IR::Reg dest_reg{tld4.dest_reg}; | ||
| 177 | for (size_t element = 0; element < 4; ++element) { | ||
| 178 | if (((tld4.mask >> element) & 1) == 0) { | ||
| 179 | continue; | ||
| 180 | } | ||
| 181 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 182 | ++dest_reg; | ||
| 183 | } | ||
| 184 | if (tld4.sparse_pred != IR::Pred::PT) { | ||
| 185 | v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } // Anonymous namespace | ||
| 189 | |||
| 190 | void TranslatorVisitor::TLD4(u64 insn) { | ||
| 191 | union { | ||
| 192 | u64 raw; | ||
| 193 | BitField<56, 2, ComponentType> component; | ||
| 194 | BitField<54, 2, OffsetType> offset; | ||
| 195 | } const tld4{insn}; | ||
| 196 | Impl(*this, insn, tld4.component, tld4.offset, false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void TranslatorVisitor::TLD4_b(u64 insn) { | ||
| 200 | union { | ||
| 201 | u64 raw; | ||
| 202 | BitField<38, 2, ComponentType> component; | ||
| 203 | BitField<36, 2, OffsetType> offset; | ||
| 204 | } const tld4{insn}; | ||
| 205 | Impl(*this, insn, tld4.component, tld4.offset, true); | ||
| 206 | } | ||
| 207 | |||
| 208 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..34efa2d50 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | |||
| @@ -0,0 +1,134 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F32, | ||
| 16 | F16, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class ComponentType : u64 { | ||
| 20 | R = 0, | ||
| 21 | G = 1, | ||
| 22 | B = 2, | ||
| 23 | A = 3, | ||
| 24 | }; | ||
| 25 | |||
| 26 | union Encoding { | ||
| 27 | u64 raw; | ||
| 28 | BitField<55, 1, Precision> precision; | ||
| 29 | BitField<52, 2, ComponentType> component_type; | ||
| 30 | BitField<51, 1, u64> aoffi; | ||
| 31 | BitField<50, 1, u64> dc; | ||
| 32 | BitField<49, 1, u64> nodep; | ||
| 33 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 34 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 35 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 36 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 37 | BitField<36, 13, u64> cbuf_offset; | ||
| 38 | }; | ||
| 39 | |||
| 40 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 41 | if (!IR::IsAligned(reg, alignment)) { | ||
| 42 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 47 | const IR::U32 value{v.X(reg)}; | ||
| 48 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 49 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 50 | } | ||
| 51 | |||
| 52 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 53 | const Encoding tld4s{insn}; | ||
| 54 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))}; | ||
| 55 | const IR::Reg reg_a{tld4s.src_reg_a}; | ||
| 56 | const IR::Reg reg_b{tld4s.src_reg_b}; | ||
| 57 | IR::TextureInstInfo info{}; | ||
| 58 | if (tld4s.precision == Precision::F16) { | ||
| 59 | info.relaxed_precision.Assign(1); | ||
| 60 | } | ||
| 61 | info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value())); | ||
| 62 | info.type.Assign(Shader::TextureType::Color2D); | ||
| 63 | info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); | ||
| 64 | IR::Value coords; | ||
| 65 | if (tld4s.aoffi != 0) { | ||
| 66 | CheckAlignment(reg_a, 2); | ||
| 67 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 68 | IR::Value offset = MakeOffset(v, reg_b); | ||
| 69 | if (tld4s.dc != 0) { | ||
| 70 | CheckAlignment(reg_b, 2); | ||
| 71 | IR::F32 dref = v.F(reg_b + 1); | ||
| 72 | return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); | ||
| 73 | } | ||
| 74 | return v.ir.ImageGather(handle, coords, offset, {}, info); | ||
| 75 | } | ||
| 76 | if (tld4s.dc != 0) { | ||
| 77 | CheckAlignment(reg_a, 2); | ||
| 78 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 79 | IR::F32 dref = v.F(reg_b); | ||
| 80 | return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); | ||
| 81 | } | ||
| 82 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); | ||
| 83 | return v.ir.ImageGather(handle, coords, {}, {}, info); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Reg RegStoreComponent32(u64 insn, size_t index) { | ||
| 87 | const Encoding tlds4{insn}; | ||
| 88 | switch (index) { | ||
| 89 | case 0: | ||
| 90 | return tlds4.dest_reg_a; | ||
| 91 | case 1: | ||
| 92 | CheckAlignment(tlds4.dest_reg_a, 2); | ||
| 93 | return tlds4.dest_reg_a + 1; | ||
| 94 | case 2: | ||
| 95 | return tlds4.dest_reg_b; | ||
| 96 | case 3: | ||
| 97 | CheckAlignment(tlds4.dest_reg_b, 2); | ||
| 98 | return tlds4.dest_reg_b + 1; | ||
| 99 | } | ||
| 100 | throw LogicError("Invalid store index {}", index); | ||
| 101 | } | ||
| 102 | |||
| 103 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 104 | for (size_t component = 0; component < 4; ++component) { | ||
| 105 | const IR::Reg dest{RegStoreComponent32(insn, component)}; | ||
| 106 | v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 111 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 112 | } | ||
| 113 | |||
| 114 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 115 | std::array<IR::F32, 4> swizzled; | ||
| 116 | for (size_t component = 0; component < 4; ++component) { | ||
| 117 | swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 118 | } | ||
| 119 | const Encoding tld4s{insn}; | ||
| 120 | v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 121 | v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 122 | } | ||
| 123 | } // Anonymous namespace | ||
| 124 | |||
| 125 | void TranslatorVisitor::TLD4S(u64 insn) { | ||
| 126 | const IR::Value sample{Sample(*this, insn)}; | ||
| 127 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 128 | Store32(*this, insn, sample); | ||
| 129 | } else { | ||
| 130 | Store16(*this, insn, sample); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..c3fe3ffda --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { | ||
| 49 | const IR::U32 value{v.X(reg)}; | ||
| 50 | const u32 base{has_lod_clamp ? 12U : 16U}; | ||
| 51 | return v.ir.CompositeConstruct( | ||
| 52 | v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), | ||
| 53 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 57 | union { | ||
| 58 | u64 raw; | ||
| 59 | BitField<49, 1, u64> nodep; | ||
| 60 | BitField<35, 1, u64> aoffi; | ||
| 61 | BitField<50, 1, u64> lc; | ||
| 62 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 65 | BitField<20, 8, IR::Reg> derivate_reg; | ||
| 66 | BitField<28, 3, TextureType> type; | ||
| 67 | BitField<31, 4, u64> mask; | ||
| 68 | BitField<36, 13, u64> cbuf_offset; | ||
| 69 | } const txd{insn}; | ||
| 70 | |||
| 71 | const bool has_lod_clamp = txd.lc != 0; | ||
| 72 | if (has_lod_clamp) { | ||
| 73 | throw NotImplementedException("TXD.LC - CLAMP is not implemented"); | ||
| 74 | } | ||
| 75 | |||
| 76 | IR::Value coords; | ||
| 77 | u32 num_derivates{}; | ||
| 78 | IR::Reg base_reg{txd.coord_reg}; | ||
| 79 | IR::Reg last_reg; | ||
| 80 | IR::Value handle; | ||
| 81 | if (is_bindless) { | ||
| 82 | handle = v.X(base_reg++); | ||
| 83 | } else { | ||
| 84 | handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); | ||
| 85 | } | ||
| 86 | |||
| 87 | const auto read_array{[&]() -> IR::F32 { | ||
| 88 | const IR::U32 base{v.ir.Imm32(0)}; | ||
| 89 | const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; | ||
| 90 | const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; | ||
| 91 | return v.ir.ConvertUToF(32, 16, array_index); | ||
| 92 | }}; | ||
| 93 | switch (txd.type) { | ||
| 94 | case TextureType::_1D: { | ||
| 95 | coords = v.F(base_reg); | ||
| 96 | num_derivates = 1; | ||
| 97 | last_reg = base_reg + 1; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case TextureType::ARRAY_1D: { | ||
| 101 | last_reg = base_reg + 1; | ||
| 102 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); | ||
| 103 | num_derivates = 1; | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | case TextureType::_2D: { | ||
| 107 | last_reg = base_reg + 2; | ||
| 108 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); | ||
| 109 | num_derivates = 2; | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case TextureType::ARRAY_2D: { | ||
| 113 | last_reg = base_reg + 2; | ||
| 114 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); | ||
| 115 | num_derivates = 2; | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | default: | ||
| 119 | throw NotImplementedException("Invalid texture type"); | ||
| 120 | } | ||
| 121 | |||
| 122 | const IR::Reg derivate_reg{txd.derivate_reg}; | ||
| 123 | IR::Value derivates; | ||
| 124 | switch (num_derivates) { | ||
| 125 | case 1: { | ||
| 126 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case 2: { | ||
| 130 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), | ||
| 131 | v.F(derivate_reg + 2), v.F(derivate_reg + 3)); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | default: | ||
| 135 | throw NotImplementedException("Invalid texture type"); | ||
| 136 | } | ||
| 137 | |||
| 138 | IR::Value offset; | ||
| 139 | if (txd.aoffi != 0) { | ||
| 140 | offset = MakeOffset(v, last_reg, has_lod_clamp); | ||
| 141 | } | ||
| 142 | |||
| 143 | IR::F32 lod_clamp; | ||
| 144 | if (has_lod_clamp) { | ||
| 145 | // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. | ||
| 146 | // to convert a fixed point, float(value) / float(1 << fixed_point) | ||
| 147 | // in this case the fixed_point is 8. | ||
| 148 | const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; | ||
| 149 | const IR::F32 fixp_lc{v.ir.ConvertUToF( | ||
| 150 | 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; | ||
| 151 | lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); | ||
| 152 | } | ||
| 153 | |||
| 154 | IR::TextureInstInfo info{}; | ||
| 155 | info.type.Assign(GetType(txd.type)); | ||
| 156 | info.num_derivates.Assign(num_derivates); | ||
| 157 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | ||
| 158 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; | ||
| 159 | |||
| 160 | IR::Reg dest_reg{txd.dest_reg}; | ||
| 161 | for (size_t element = 0; element < 4; ++element) { | ||
| 162 | if (((txd.mask >> element) & 1) == 0) { | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 166 | ++dest_reg; | ||
| 167 | } | ||
| 168 | if (txd.sparse_pred != IR::Pred::PT) { | ||
| 169 | v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | } // Anonymous namespace | ||
| 173 | |||
| 174 | void TranslatorVisitor::TXD(u64 insn) { | ||
| 175 | Impl(*this, insn, false); | ||
| 176 | } | ||
| 177 | |||
| 178 | void TranslatorVisitor::TXD_b(u64 insn) { | ||
| 179 | Impl(*this, insn, true); | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..983058303 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp | |||
| @@ -0,0 +1,165 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | const auto read_array{ | ||
| 50 | [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.X(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 73 | const IR::U32 value{v.X(reg++)}; | ||
| 74 | switch (type) { | ||
| 75 | case TextureType::_1D: | ||
| 76 | case TextureType::ARRAY_1D: | ||
| 77 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 78 | case TextureType::_2D: | ||
| 79 | case TextureType::ARRAY_2D: | ||
| 80 | return v.ir.CompositeConstruct( | ||
| 81 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 82 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 83 | case TextureType::_3D: | ||
| 84 | case TextureType::ARRAY_3D: | ||
| 85 | return v.ir.CompositeConstruct( | ||
| 86 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 87 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 88 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 89 | case TextureType::CUBE: | ||
| 90 | case TextureType::ARRAY_CUBE: | ||
| 91 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 92 | } | ||
| 93 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 94 | } | ||
| 95 | |||
| 96 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 97 | union { | ||
| 98 | u64 raw; | ||
| 99 | BitField<49, 1, u64> nodep; | ||
| 100 | BitField<55, 1, u64> lod; | ||
| 101 | BitField<50, 1, u64> multisample; | ||
| 102 | BitField<35, 1, u64> aoffi; | ||
| 103 | BitField<54, 1, u64> clamp; | ||
| 104 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 105 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 106 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 107 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 108 | BitField<28, 3, TextureType> type; | ||
| 109 | BitField<31, 4, u64> mask; | ||
| 110 | BitField<36, 13, u64> cbuf_offset; | ||
| 111 | } const tld{insn}; | ||
| 112 | |||
| 113 | const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; | ||
| 114 | |||
| 115 | IR::Reg meta_reg{tld.meta_reg}; | ||
| 116 | IR::Value handle; | ||
| 117 | IR::Value offset; | ||
| 118 | IR::U32 lod; | ||
| 119 | IR::U32 multisample; | ||
| 120 | if (is_bindless) { | ||
| 121 | handle = v.X(meta_reg++); | ||
| 122 | } else { | ||
| 123 | handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||
| 124 | } | ||
| 125 | if (tld.lod != 0) { | ||
| 126 | lod = v.X(meta_reg++); | ||
| 127 | } else { | ||
| 128 | lod = v.ir.Imm32(0U); | ||
| 129 | } | ||
| 130 | if (tld.aoffi != 0) { | ||
| 131 | offset = MakeOffset(v, meta_reg, tld.type); | ||
| 132 | } | ||
| 133 | if (tld.multisample != 0) { | ||
| 134 | multisample = v.X(meta_reg++); | ||
| 135 | } | ||
| 136 | if (tld.clamp != 0) { | ||
| 137 | throw NotImplementedException("TLD.CL - CLAMP is not implmented"); | ||
| 138 | } | ||
| 139 | IR::TextureInstInfo info{}; | ||
| 140 | info.type.Assign(GetType(tld.type)); | ||
| 141 | const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; | ||
| 142 | |||
| 143 | IR::Reg dest_reg{tld.dest_reg}; | ||
| 144 | for (size_t element = 0; element < 4; ++element) { | ||
| 145 | if (((tld.mask >> element) & 1) == 0) { | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 149 | ++dest_reg; | ||
| 150 | } | ||
| 151 | if (tld.sparse_pred != IR::Pred::PT) { | ||
| 152 | v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 153 | } | ||
| 154 | } | ||
| 155 | } // Anonymous namespace | ||
| 156 | |||
| 157 | void TranslatorVisitor::TLD(u64 insn) { | ||
| 158 | Impl(*this, insn, false); | ||
| 159 | } | ||
| 160 | |||
| 161 | void TranslatorVisitor::TLD_b(u64 insn) { | ||
| 162 | Impl(*this, insn, true); | ||
| 163 | } | ||
| 164 | |||
| 165 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..5dd7e31b2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp | |||
| @@ -0,0 +1,242 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | constexpr unsigned R = 1; | ||
| 20 | constexpr unsigned G = 2; | ||
| 21 | constexpr unsigned B = 4; | ||
| 22 | constexpr unsigned A = 8; | ||
| 23 | |||
| 24 | constexpr std::array RG_LUT{ | ||
| 25 | R, // | ||
| 26 | G, // | ||
| 27 | B, // | ||
| 28 | A, // | ||
| 29 | R | G, // | ||
| 30 | R | A, // | ||
| 31 | G | A, // | ||
| 32 | B | A, // | ||
| 33 | }; | ||
| 34 | |||
| 35 | constexpr std::array RGBA_LUT{ | ||
| 36 | R | G | B, // | ||
| 37 | R | G | A, // | ||
| 38 | R | B | A, // | ||
| 39 | G | B | A, // | ||
| 40 | R | G | B | A, // | ||
| 41 | }; | ||
| 42 | |||
| 43 | union Encoding { | ||
| 44 | u64 raw; | ||
| 45 | BitField<59, 1, Precision> precision; | ||
| 46 | BitField<54, 1, u64> aoffi; | ||
| 47 | BitField<53, 1, u64> lod; | ||
| 48 | BitField<55, 1, u64> ms; | ||
| 49 | BitField<49, 1, u64> nodep; | ||
| 50 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 52 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 53 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 54 | BitField<36, 13, u64> cbuf_offset; | ||
| 55 | BitField<50, 3, u64> swizzle; | ||
| 56 | BitField<53, 4, u64> encoding; | ||
| 57 | }; | ||
| 58 | |||
| 59 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 60 | if (!IR::IsAligned(reg, alignment)) { | ||
| 61 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 66 | const IR::U32 value{v.X(reg)}; | ||
| 67 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 68 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding tlds{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; | ||
| 74 | const IR::Reg reg_a{tlds.src_reg_a}; | ||
| 75 | const IR::Reg reg_b{tlds.src_reg_b}; | ||
| 76 | IR::Value coords; | ||
| 77 | IR::U32 lod{v.ir.Imm32(0U)}; | ||
| 78 | IR::Value offsets; | ||
| 79 | IR::U32 multisample; | ||
| 80 | Shader::TextureType texture_type{}; | ||
| 81 | switch (tlds.encoding) { | ||
| 82 | case 0: | ||
| 83 | texture_type = Shader::TextureType::Color1D; | ||
| 84 | coords = v.X(reg_a); | ||
| 85 | break; | ||
| 86 | case 1: | ||
| 87 | texture_type = Shader::TextureType::Color1D; | ||
| 88 | coords = v.X(reg_a); | ||
| 89 | lod = v.X(reg_b); | ||
| 90 | break; | ||
| 91 | case 2: | ||
| 92 | texture_type = Shader::TextureType::Color2D; | ||
| 93 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); | ||
| 94 | break; | ||
| 95 | case 4: | ||
| 96 | CheckAlignment(reg_a, 2); | ||
| 97 | texture_type = Shader::TextureType::Color2D; | ||
| 98 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 99 | offsets = MakeOffset(v, reg_b); | ||
| 100 | break; | ||
| 101 | case 5: | ||
| 102 | CheckAlignment(reg_a, 2); | ||
| 103 | texture_type = Shader::TextureType::Color2D; | ||
| 104 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 105 | lod = v.X(reg_b); | ||
| 106 | break; | ||
| 107 | case 6: | ||
| 108 | CheckAlignment(reg_a, 2); | ||
| 109 | texture_type = Shader::TextureType::Color2D; | ||
| 110 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 111 | multisample = v.X(reg_b); | ||
| 112 | break; | ||
| 113 | case 7: | ||
| 114 | CheckAlignment(reg_a, 2); | ||
| 115 | texture_type = Shader::TextureType::Color3D; | ||
| 116 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); | ||
| 117 | break; | ||
| 118 | case 8: { | ||
| 119 | CheckAlignment(reg_b, 2); | ||
| 120 | const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; | ||
| 121 | texture_type = Shader::TextureType::ColorArray2D; | ||
| 122 | coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | case 12: | ||
| 126 | CheckAlignment(reg_a, 2); | ||
| 127 | CheckAlignment(reg_b, 2); | ||
| 128 | texture_type = Shader::TextureType::Color2D; | ||
| 129 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 130 | lod = v.X(reg_b); | ||
| 131 | offsets = MakeOffset(v, reg_b + 1); | ||
| 132 | break; | ||
| 133 | default: | ||
| 134 | throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); | ||
| 135 | } | ||
| 136 | IR::TextureInstInfo info{}; | ||
| 137 | if (tlds.precision == Precision::F16) { | ||
| 138 | info.relaxed_precision.Assign(1); | ||
| 139 | } | ||
| 140 | info.type.Assign(texture_type); | ||
| 141 | return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); | ||
| 142 | } | ||
| 143 | |||
| 144 | unsigned Swizzle(u64 insn) { | ||
| 145 | const Encoding tlds{insn}; | ||
| 146 | const size_t encoding{tlds.swizzle}; | ||
| 147 | if (tlds.dest_reg_b == IR::Reg::RZ) { | ||
| 148 | if (encoding >= RG_LUT.size()) { | ||
| 149 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 150 | } | ||
| 151 | return RG_LUT[encoding]; | ||
| 152 | } else { | ||
| 153 | if (encoding >= RGBA_LUT.size()) { | ||
| 154 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 155 | } | ||
| 156 | return RGBA_LUT[encoding]; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 161 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 162 | } | ||
| 163 | |||
| 164 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 165 | const Encoding tlds{insn}; | ||
| 166 | switch (index) { | ||
| 167 | case 0: | ||
| 168 | return tlds.dest_reg_a; | ||
| 169 | case 1: | ||
| 170 | CheckAlignment(tlds.dest_reg_a, 2); | ||
| 171 | return tlds.dest_reg_a + 1; | ||
| 172 | case 2: | ||
| 173 | return tlds.dest_reg_b; | ||
| 174 | case 3: | ||
| 175 | CheckAlignment(tlds.dest_reg_b, 2); | ||
| 176 | return tlds.dest_reg_b + 1; | ||
| 177 | } | ||
| 178 | throw LogicError("Invalid store index {}", index); | ||
| 179 | } | ||
| 180 | |||
| 181 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 182 | const unsigned swizzle{Swizzle(insn)}; | ||
| 183 | unsigned store_index{0}; | ||
| 184 | for (unsigned component = 0; component < 4; ++component) { | ||
| 185 | if (((swizzle >> component) & 1) == 0) { | ||
| 186 | continue; | ||
| 187 | } | ||
| 188 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 189 | v.F(dest, Extract(v, sample, component)); | ||
| 190 | ++store_index; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 195 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 196 | } | ||
| 197 | |||
| 198 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 199 | const unsigned swizzle{Swizzle(insn)}; | ||
| 200 | unsigned store_index{0}; | ||
| 201 | std::array<IR::F32, 4> swizzled; | ||
| 202 | for (unsigned component = 0; component < 4; ++component) { | ||
| 203 | if (((swizzle >> component) & 1) == 0) { | ||
| 204 | continue; | ||
| 205 | } | ||
| 206 | swizzled[store_index] = Extract(v, sample, component); | ||
| 207 | ++store_index; | ||
| 208 | } | ||
| 209 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 210 | const Encoding tlds{insn}; | ||
| 211 | switch (store_index) { | ||
| 212 | case 1: | ||
| 213 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 214 | break; | ||
| 215 | case 2: | ||
| 216 | case 3: | ||
| 217 | case 4: | ||
| 218 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 219 | switch (store_index) { | ||
| 220 | case 2: | ||
| 221 | break; | ||
| 222 | case 3: | ||
| 223 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 224 | break; | ||
| 225 | case 4: | ||
| 226 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } // Anonymous namespace | ||
| 233 | |||
| 234 | void TranslatorVisitor::TLDS(u64 insn) { | ||
| 235 | const IR::Value sample{Sample(*this, insn)}; | ||
| 236 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 237 | Store32(*this, insn, sample); | ||
| 238 | } else { | ||
| 239 | Store16(*this, insn, sample); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..aea3c0e62 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | // The ISA reads an array component here, but this is not needed on high level shading languages | ||
| 50 | // We are dropping this information. | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.F(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.F(reg + 1); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<49, 1, u64> nodep; | ||
| 76 | BitField<35, 1, u64> ndv; | ||
| 77 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 78 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 79 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 80 | BitField<28, 3, TextureType> type; | ||
| 81 | BitField<31, 4, u64> mask; | ||
| 82 | BitField<36, 13, u64> cbuf_offset; | ||
| 83 | } const tmml{insn}; | ||
| 84 | |||
| 85 | if ((tmml.mask & 0b1100) != 0) { | ||
| 86 | throw NotImplementedException("TMML BA results are not implmented"); | ||
| 87 | } | ||
| 88 | const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; | ||
| 89 | |||
| 90 | IR::U32 handle; | ||
| 91 | IR::Reg meta_reg{tmml.meta_reg}; | ||
| 92 | if (is_bindless) { | ||
| 93 | handle = v.X(meta_reg++); | ||
| 94 | } else { | ||
| 95 | handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); | ||
| 96 | } | ||
| 97 | IR::TextureInstInfo info{}; | ||
| 98 | info.type.Assign(GetType(tmml.type)); | ||
| 99 | const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; | ||
| 100 | |||
| 101 | IR::Reg dest_reg{tmml.dest_reg}; | ||
| 102 | for (size_t element = 0; element < 4; ++element) { | ||
| 103 | if (((tmml.mask >> element) & 1) == 0) { | ||
| 104 | continue; | ||
| 105 | } | ||
| 106 | IR::F32 value{v.ir.CompositeExtract(sample, element)}; | ||
| 107 | if (element < 2) { | ||
| 108 | IR::U32 casted_value; | ||
| 109 | if (element == 0) { | ||
| 110 | casted_value = v.ir.ConvertFToU(32, value); | ||
| 111 | } else { | ||
| 112 | casted_value = v.ir.ConvertFToS(16, value); | ||
| 113 | } | ||
| 114 | v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); | ||
| 115 | } else { | ||
| 116 | v.F(dest_reg, value); | ||
| 117 | } | ||
| 118 | ++dest_reg; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } // Anonymous namespace | ||
| 122 | |||
| 123 | void TranslatorVisitor::TMML(u64 insn) { | ||
| 124 | Impl(*this, insn, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void TranslatorVisitor::TMML_b(u64 insn) { | ||
| 128 | Impl(*this, insn, true); | ||
| 129 | } | ||
| 130 | |||
| 131 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..0459e5473 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Mode : u64 { | ||
| 15 | Dimension = 1, | ||
| 16 | TextureType = 2, | ||
| 17 | SamplePos = 5, | ||
| 18 | }; | ||
| 19 | |||
| 20 | IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { | ||
| 21 | switch (mode) { | ||
| 22 | case Mode::Dimension: { | ||
| 23 | const IR::U32 lod{v.X(src_reg)}; | ||
| 24 | return v.ir.ImageQueryDimension(handle, lod); | ||
| 25 | } | ||
| 26 | case Mode::TextureType: | ||
| 27 | case Mode::SamplePos: | ||
| 28 | default: | ||
| 29 | throw NotImplementedException("Mode {}", mode); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { | ||
| 34 | union { | ||
| 35 | u64 raw; | ||
| 36 | BitField<49, 1, u64> nodep; | ||
| 37 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 38 | BitField<8, 8, IR::Reg> src_reg; | ||
| 39 | BitField<22, 3, Mode> mode; | ||
| 40 | BitField<31, 4, u64> mask; | ||
| 41 | } const txq{insn}; | ||
| 42 | |||
| 43 | IR::Reg src_reg{txq.src_reg}; | ||
| 44 | IR::U32 handle; | ||
| 45 | if (cbuf_offset) { | ||
| 46 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 47 | } else { | ||
| 48 | handle = v.X(src_reg); | ||
| 49 | ++src_reg; | ||
| 50 | } | ||
| 51 | const IR::Value query{Query(v, handle, txq.mode, src_reg)}; | ||
| 52 | IR::Reg dest_reg{txq.dest_reg}; | ||
| 53 | for (int element = 0; element < 4; ++element) { | ||
| 54 | if (((txq.mask >> element) & 1) == 0) { | ||
| 55 | continue; | ||
| 56 | } | ||
| 57 | v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); | ||
| 58 | ++dest_reg; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::TXQ(u64 insn) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<36, 13, u64> cbuf_offset; | ||
| 67 | } const txq{insn}; | ||
| 68 | |||
| 69 | Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::TXQ_b(u64 insn) { | ||
| 73 | Impl(*this, insn, std::nullopt); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/exception.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | |||
| 10 | IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, | ||
| 11 | u32 selector, bool is_signed) { | ||
| 12 | switch (width) { | ||
| 13 | case VideoWidth::Byte: | ||
| 14 | case VideoWidth::Unknown: | ||
| 15 | return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); | ||
| 16 | case VideoWidth::Short: | ||
| 17 | return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); | ||
| 18 | case VideoWidth::Word: | ||
| 19 | return value; | ||
| 20 | default: | ||
| 21 | throw NotImplementedException("Unknown VideoWidth {}", width); | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { | ||
| 26 | // immediates must be 16-bit format. | ||
| 27 | return is_immediate ? VideoWidth::Short : width; | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | enum class VideoWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Unknown, | ||
| 14 | Short, | ||
| 15 | Word, | ||
| 16 | }; | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, | ||
| 19 | VideoWidth width, u32 selector, bool is_signed); | ||
| 20 | |||
| 21 | [[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class VideoMinMaxOps : u64 { | ||
| 13 | MRG_16H, | ||
| 14 | MRG_16L, | ||
| 15 | MRG_8B0, | ||
| 16 | MRG_8B2, | ||
| 17 | ACC, | ||
| 18 | MIN, | ||
| 19 | MAX, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, | ||
| 23 | VideoMinMaxOps op, bool is_signed) { | ||
| 24 | switch (op) { | ||
| 25 | case VideoMinMaxOps::MIN: | ||
| 26 | return ir.IMin(lhs, rhs, is_signed); | ||
| 27 | case VideoMinMaxOps::MAX: | ||
| 28 | return ir.IMax(lhs, rhs, is_signed); | ||
| 29 | default: | ||
| 30 | throw NotImplementedException("VMNMX op {}", op); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void TranslatorVisitor::VMNMX(u64 insn) { | ||
| 36 | union { | ||
| 37 | u64 raw; | ||
| 38 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 39 | BitField<20, 16, u64> src_b_imm; | ||
| 40 | BitField<28, 2, u64> src_b_selector; | ||
| 41 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 42 | BitField<36, 2, u64> src_a_selector; | ||
| 43 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> src_a_sign; | ||
| 46 | BitField<49, 1, u64> src_b_sign; | ||
| 47 | BitField<50, 1, u64> is_src_b_reg; | ||
| 48 | BitField<51, 3, VideoMinMaxOps> op; | ||
| 49 | BitField<54, 1, u64> dest_sign; | ||
| 50 | BitField<55, 1, u64> sat; | ||
| 51 | BitField<56, 1, u64> mx; | ||
| 52 | } const vmnmx{insn}; | ||
| 53 | |||
| 54 | if (vmnmx.cc != 0) { | ||
| 55 | throw NotImplementedException("VMNMX CC"); | ||
| 56 | } | ||
| 57 | if (vmnmx.sat != 0) { | ||
| 58 | throw NotImplementedException("VMNMX SAT"); | ||
| 59 | } | ||
| 60 | // Selectors were shown to default to 2 in unit tests | ||
| 61 | if (vmnmx.src_a_selector != 2) { | ||
| 62 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); | ||
| 63 | } | ||
| 64 | if (vmnmx.src_b_selector != 2) { | ||
| 65 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); | ||
| 66 | } | ||
| 67 | if (vmnmx.src_a_width != VideoWidth::Word) { | ||
| 68 | throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); | ||
| 69 | } | ||
| 70 | |||
| 71 | const bool is_b_imm{vmnmx.is_src_b_reg == 0}; | ||
| 72 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 73 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)}; | ||
| 74 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 75 | |||
| 76 | const VideoWidth a_width{vmnmx.src_a_width}; | ||
| 77 | const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; | ||
| 78 | |||
| 79 | const bool src_a_signed{vmnmx.src_a_sign != 0}; | ||
| 80 | const bool src_b_signed{vmnmx.src_b_sign != 0}; | ||
| 81 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; | ||
| 82 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; | ||
| 83 | |||
| 84 | // First operation's sign is only dependent on operand b's sign | ||
| 85 | const bool op_1_signed{src_b_signed}; | ||
| 86 | |||
| 87 | const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) | ||
| 88 | : ir.IMin(op_a, op_b, op_1_signed)}; | ||
| 89 | X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::VMAD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<20, 16, u64> src_b_imm; | ||
| 16 | BitField<28, 2, u64> src_b_selector; | ||
| 17 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 18 | BitField<36, 2, u64> src_a_selector; | ||
| 19 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> src_a_sign; | ||
| 22 | BitField<49, 1, u64> src_b_sign; | ||
| 23 | BitField<50, 1, u64> is_src_b_reg; | ||
| 24 | BitField<51, 2, u64> scale; | ||
| 25 | BitField<53, 1, u64> src_c_neg; | ||
| 26 | BitField<54, 1, u64> src_a_neg; | ||
| 27 | BitField<55, 1, u64> sat; | ||
| 28 | } const vmad{insn}; | ||
| 29 | |||
| 30 | if (vmad.cc != 0) { | ||
| 31 | throw NotImplementedException("VMAD CC"); | ||
| 32 | } | ||
| 33 | if (vmad.sat != 0) { | ||
| 34 | throw NotImplementedException("VMAD SAT"); | ||
| 35 | } | ||
| 36 | if (vmad.scale != 0) { | ||
| 37 | throw NotImplementedException("VMAD SCALE"); | ||
| 38 | } | ||
| 39 | if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { | ||
| 40 | throw NotImplementedException("VMAD PO"); | ||
| 41 | } | ||
| 42 | if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { | ||
| 43 | throw NotImplementedException("VMAD NEG"); | ||
| 44 | } | ||
| 45 | const bool is_b_imm{vmad.is_src_b_reg == 0}; | ||
| 46 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 47 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)}; | ||
| 48 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 49 | |||
| 50 | const u32 a_selector{static_cast<u32>(vmad.src_a_selector)}; | ||
| 51 | // Immediate values can't have a selector | ||
| 52 | const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)}; | ||
| 53 | const VideoWidth a_width{vmad.src_a_width}; | ||
| 54 | const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; | ||
| 55 | |||
| 56 | const bool src_a_signed{vmad.src_a_sign != 0}; | ||
| 57 | const bool src_b_signed{vmad.src_b_sign != 0}; | ||
| 58 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 59 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 60 | |||
| 61 | X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..1b66abc33 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class VsetpCompareOp : u64 { | ||
| 14 | False = 0, | ||
| 15 | LessThan, | ||
| 16 | Equal, | ||
| 17 | LessThanEqual, | ||
| 18 | GreaterThan = 16, | ||
| 19 | NotEqual, | ||
| 20 | GreaterThanEqual, | ||
| 21 | True, | ||
| 22 | }; | ||
| 23 | |||
| 24 | CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { | ||
| 25 | switch (op) { | ||
| 26 | case VsetpCompareOp::False: | ||
| 27 | return CompareOp::False; | ||
| 28 | case VsetpCompareOp::LessThan: | ||
| 29 | return CompareOp::LessThan; | ||
| 30 | case VsetpCompareOp::Equal: | ||
| 31 | return CompareOp::Equal; | ||
| 32 | case VsetpCompareOp::LessThanEqual: | ||
| 33 | return CompareOp::LessThanEqual; | ||
| 34 | case VsetpCompareOp::GreaterThan: | ||
| 35 | return CompareOp::GreaterThan; | ||
| 36 | case VsetpCompareOp::NotEqual: | ||
| 37 | return CompareOp::NotEqual; | ||
| 38 | case VsetpCompareOp::GreaterThanEqual: | ||
| 39 | return CompareOp::GreaterThanEqual; | ||
| 40 | case VsetpCompareOp::True: | ||
| 41 | return CompareOp::True; | ||
| 42 | default: | ||
| 43 | throw NotImplementedException("Invalid compare op {}", op); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::VSETP(u64 insn) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 52 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 53 | BitField<20, 16, u64> src_b_imm; | ||
| 54 | BitField<28, 2, u64> src_b_selector; | ||
| 55 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 56 | BitField<36, 2, u64> src_a_selector; | ||
| 57 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 58 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 59 | BitField<42, 1, u64> neg_bop_pred; | ||
| 60 | BitField<43, 5, VsetpCompareOp> compare_op; | ||
| 61 | BitField<45, 2, BooleanOp> bop; | ||
| 62 | BitField<48, 1, u64> src_a_sign; | ||
| 63 | BitField<49, 1, u64> src_b_sign; | ||
| 64 | BitField<50, 1, u64> is_src_b_reg; | ||
| 65 | } const vsetp{insn}; | ||
| 66 | |||
| 67 | const bool is_b_imm{vsetp.is_src_b_reg == 0}; | ||
| 68 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 69 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; | ||
| 70 | |||
| 71 | const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; | ||
| 72 | const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)}; | ||
| 73 | const VideoWidth a_width{vsetp.src_a_width}; | ||
| 74 | const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; | ||
| 75 | |||
| 76 | const bool src_a_signed{vsetp.src_a_sign != 0}; | ||
| 77 | const bool src_b_signed{vsetp.src_b_sign != 0}; | ||
| 78 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 79 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 80 | |||
| 81 | // Compare operation's sign is only dependent on operand b's sign | ||
| 82 | const bool compare_signed{src_b_signed}; | ||
| 83 | const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; | ||
| 84 | const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; | ||
| 85 | const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; | ||
| 86 | const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; | ||
| 87 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; | ||
| 88 | ir.SetPred(vsetp.dest_pred_a, result_a); | ||
| 89 | ir.SetPred(vsetp.dest_pred_b, result_b); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..7ce370f09 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class VoteOp : u64 { | ||
| 12 | ALL, | ||
| 13 | ANY, | ||
| 14 | EQ, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { | ||
| 18 | switch (vote_op) { | ||
| 19 | case VoteOp::ALL: | ||
| 20 | return ir.VoteAll(pred); | ||
| 21 | case VoteOp::ANY: | ||
| 22 | return ir.VoteAny(pred); | ||
| 23 | case VoteOp::EQ: | ||
| 24 | return ir.VoteEqual(pred); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid VOTE op {}", vote_op); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | void Vote(TranslatorVisitor& v, u64 insn) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<39, 3, IR::Pred> pred_a; | ||
| 35 | BitField<42, 1, u64> neg_pred_a; | ||
| 36 | BitField<45, 3, IR::Pred> pred_b; | ||
| 37 | BitField<48, 2, VoteOp> vote_op; | ||
| 38 | } const vote{insn}; | ||
| 39 | |||
| 40 | const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; | ||
| 41 | v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); | ||
| 42 | v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::VOTE(u64 insn) { | ||
| 47 | Vote(*this, insn); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::VOTE_vtg(u64) { | ||
| 51 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class ShuffleMode : u64 { | ||
| 14 | IDX, | ||
| 15 | UP, | ||
| 16 | DOWN, | ||
| 17 | BFLY, | ||
| 18 | }; | ||
| 19 | |||
| 20 | [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||
| 21 | const IR::U32& index, const IR::U32& mask, | ||
| 22 | ShuffleMode shfl_op) { | ||
| 23 | const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||
| 24 | const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||
| 25 | switch (shfl_op) { | ||
| 26 | case ShuffleMode::IDX: | ||
| 27 | return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||
| 28 | case ShuffleMode::UP: | ||
| 29 | return ir.ShuffleUp(value, index, clamp, seg_mask); | ||
| 30 | case ShuffleMode::DOWN: | ||
| 31 | return ir.ShuffleDown(value, index, clamp, seg_mask); | ||
| 32 | case ShuffleMode::BFLY: | ||
| 33 | return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||
| 34 | default: | ||
| 35 | throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||
| 40 | union { | ||
| 41 | u64 insn; | ||
| 42 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 43 | BitField<8, 8, IR::Reg> src_reg; | ||
| 44 | BitField<30, 2, ShuffleMode> mode; | ||
| 45 | BitField<48, 3, IR::Pred> pred; | ||
| 46 | } const shfl{insn}; | ||
| 47 | |||
| 48 | const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||
| 49 | v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||
| 50 | v.X(shfl.dest_reg, result); | ||
| 51 | } | ||
| 52 | } // Anonymous namespace | ||
| 53 | |||
| 54 | void TranslatorVisitor::SHFL(u64 insn) { | ||
| 55 | union { | ||
| 56 | u64 insn; | ||
| 57 | BitField<20, 5, u64> src_a_imm; | ||
| 58 | BitField<28, 1, u64> src_a_flag; | ||
| 59 | BitField<29, 1, u64> src_b_flag; | ||
| 60 | BitField<34, 13, u64> src_b_imm; | ||
| 61 | } const flags{insn}; | ||
| 62 | const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||
| 63 | : GetReg20(insn)}; | ||
| 64 | const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||
| 65 | : GetReg39(insn)}; | ||
| 66 | Shuffle(*this, insn, src_a, src_b); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..8e3c4c5d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/environment.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | template <auto method> | ||
| 15 | static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { | ||
| 16 | using MethodType = decltype(method); | ||
| 17 | if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) { | ||
| 18 | (visitor.*method)(pc, insn); | ||
| 19 | } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) { | ||
| 20 | (visitor.*method)(insn); | ||
| 21 | } else { | ||
| 22 | (visitor.*method)(); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { | ||
| 27 | if (location_begin == location_end) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | TranslatorVisitor visitor{env, *block}; | ||
| 31 | for (Location pc = location_begin; pc != location_end; ++pc) { | ||
| 32 | const u64 insn{env.ReadInstruction(pc.Offset())}; | ||
| 33 | try { | ||
| 34 | const Opcode opcode{Decode(insn)}; | ||
| 35 | switch (opcode) { | ||
| 36 | #define INST(name, cute, mask) \ | ||
| 37 | case Opcode::name: \ | ||
| 38 | Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ | ||
| 39 | break; | ||
| 40 | #include "shader_recompiler/frontend/maxwell/maxwell.inc" | ||
| 41 | #undef OPCODE | ||
| 42 | default: | ||
| 43 | throw LogicError("Invalid opcode {}", opcode); | ||
| 44 | } | ||
| 45 | } catch (Exception& exception) { | ||
| 46 | exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); | ||
| 47 | throw; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..a3edd2e46 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); | ||
| 13 | |||
| 14 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..c067d459c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/settings.h" | ||
| 10 | #include "shader_recompiler/exception.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/post_order.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||
| 16 | #include "shader_recompiler/host_translate_info.h" | ||
| 17 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 18 | |||
| 19 | namespace Shader::Maxwell { | ||
| 20 | namespace { | ||
| 21 | IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { | ||
| 22 | size_t num_syntax_blocks{}; | ||
| 23 | for (const auto& node : syntax_list) { | ||
| 24 | if (node.type == IR::AbstractSyntaxNode::Type::Block) { | ||
| 25 | ++num_syntax_blocks; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | IR::BlockList blocks; | ||
| 29 | blocks.reserve(num_syntax_blocks); | ||
| 30 | for (const auto& node : syntax_list) { | ||
| 31 | if (node.type == IR::AbstractSyntaxNode::Type::Block) { | ||
| 32 | blocks.push_back(node.data.block); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | return blocks; | ||
| 36 | } | ||
| 37 | |||
| 38 | void RemoveUnreachableBlocks(IR::Program& program) { | ||
| 39 | // Some blocks might be unreachable if a function call exists unconditionally | ||
| 40 | // If this happens the number of blocks and post order blocks will mismatch | ||
| 41 | if (program.blocks.size() == program.post_order_blocks.size()) { | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | const auto begin{program.blocks.begin() + 1}; | ||
| 45 | const auto end{program.blocks.end()}; | ||
| 46 | const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; | ||
| 47 | program.blocks.erase(std::remove_if(begin, end, pred), end); | ||
| 48 | } | ||
| 49 | |||
| 50 | void CollectInterpolationInfo(Environment& env, IR::Program& program) { | ||
| 51 | if (program.stage != Stage::Fragment) { | ||
| 52 | return; | ||
| 53 | } | ||
| 54 | const ProgramHeader& sph{env.SPH()}; | ||
| 55 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 56 | std::optional<PixelImap> imap; | ||
| 57 | for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { | ||
| 58 | if (value == PixelImap::Unused) { | ||
| 59 | continue; | ||
| 60 | } | ||
| 61 | if (imap && imap != value) { | ||
| 62 | throw NotImplementedException("Per component interpolation"); | ||
| 63 | } | ||
| 64 | imap = value; | ||
| 65 | } | ||
| 66 | if (!imap) { | ||
| 67 | continue; | ||
| 68 | } | ||
| 69 | program.info.interpolation[index] = [&] { | ||
| 70 | switch (*imap) { | ||
| 71 | case PixelImap::Unused: | ||
| 72 | case PixelImap::Perspective: | ||
| 73 | return Interpolation::Smooth; | ||
| 74 | case PixelImap::Constant: | ||
| 75 | return Interpolation::Flat; | ||
| 76 | case PixelImap::ScreenLinear: | ||
| 77 | return Interpolation::NoPerspective; | ||
| 78 | } | ||
| 79 | throw NotImplementedException("Unknown interpolation {}", *imap); | ||
| 80 | }(); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void AddNVNStorageBuffers(IR::Program& program) { | ||
| 85 | if (!program.info.uses_global_memory) { | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | const u32 driver_cbuf{0}; | ||
| 89 | const u32 descriptor_size{0x10}; | ||
| 90 | const u32 num_buffers{16}; | ||
| 91 | const u32 base{[&] { | ||
| 92 | switch (program.stage) { | ||
| 93 | case Stage::VertexA: | ||
| 94 | case Stage::VertexB: | ||
| 95 | return 0x110u; | ||
| 96 | case Stage::TessellationControl: | ||
| 97 | return 0x210u; | ||
| 98 | case Stage::TessellationEval: | ||
| 99 | return 0x310u; | ||
| 100 | case Stage::Geometry: | ||
| 101 | return 0x410u; | ||
| 102 | case Stage::Fragment: | ||
| 103 | return 0x510u; | ||
| 104 | case Stage::Compute: | ||
| 105 | return 0x310u; | ||
| 106 | } | ||
| 107 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 108 | }()}; | ||
| 109 | auto& descs{program.info.storage_buffers_descriptors}; | ||
| 110 | for (u32 index = 0; index < num_buffers; ++index) { | ||
| 111 | if (!program.info.nvn_buffer_used[index]) { | ||
| 112 | continue; | ||
| 113 | } | ||
| 114 | const u32 offset{base + index * descriptor_size}; | ||
| 115 | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; | ||
| 116 | if (it != descs.end()) { | ||
| 117 | it->is_written |= program.info.stores_global_memory; | ||
| 118 | continue; | ||
| 119 | } | ||
| 120 | descs.push_back({ | ||
| 121 | .cbuf_index = driver_cbuf, | ||
| 122 | .cbuf_offset = offset, | ||
| 123 | .count = 1, | ||
| 124 | .is_written = program.info.stores_global_memory, | ||
| 125 | }); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } // Anonymous namespace | ||
| 129 | |||
| 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||
| 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | ||
| 132 | IR::Program program; | ||
| 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | ||
| 134 | program.blocks = GenerateBlocks(program.syntax_list); | ||
| 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); | ||
| 136 | program.stage = env.ShaderStage(); | ||
| 137 | program.local_memory_size = env.LocalMemorySize(); | ||
| 138 | switch (program.stage) { | ||
| 139 | case Stage::TessellationControl: { | ||
| 140 | const ProgramHeader& sph{env.SPH()}; | ||
| 141 | program.invocations = sph.common2.threads_per_input_primitive; | ||
| 142 | break; | ||
| 143 | } | ||
| 144 | case Stage::Geometry: { | ||
| 145 | const ProgramHeader& sph{env.SPH()}; | ||
| 146 | program.output_topology = sph.common3.output_topology; | ||
| 147 | program.output_vertices = sph.common4.max_output_vertices; | ||
| 148 | program.invocations = sph.common2.threads_per_input_primitive; | ||
| 149 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; | ||
| 150 | if (program.is_geometry_passthrough) { | ||
| 151 | const auto& mask{env.GpPassthroughMask()}; | ||
| 152 | for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { | ||
| 153 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case Stage::Compute: | ||
| 159 | program.workgroup_size = env.WorkgroupSize(); | ||
| 160 | program.shared_memory_size = env.SharedMemorySize(); | ||
| 161 | break; | ||
| 162 | default: | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | RemoveUnreachableBlocks(program); | ||
| 166 | |||
| 167 | // Replace instructions before the SSA rewrite | ||
| 168 | if (!host_info.support_float16) { | ||
| 169 | Optimization::LowerFp16ToFp32(program); | ||
| 170 | } | ||
| 171 | if (!host_info.support_int64) { | ||
| 172 | Optimization::LowerInt64ToInt32(program); | ||
| 173 | } | ||
| 174 | Optimization::SsaRewritePass(program); | ||
| 175 | |||
| 176 | Optimization::GlobalMemoryToStorageBufferPass(program); | ||
| 177 | Optimization::TexturePass(env, program); | ||
| 178 | |||
| 179 | Optimization::ConstantPropagationPass(program); | ||
| 180 | Optimization::DeadCodeEliminationPass(program); | ||
| 181 | if (Settings::values.renderer_debug) { | ||
| 182 | Optimization::VerificationPass(program); | ||
| 183 | } | ||
| 184 | Optimization::CollectShaderInfoPass(env, program); | ||
| 185 | CollectInterpolationInfo(env, program); | ||
| 186 | AddNVNStorageBuffers(program); | ||
| 187 | return program; | ||
| 188 | } | ||
| 189 | |||
| 190 | IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 191 | Environment& env_vertex_b) { | ||
| 192 | IR::Program result{}; | ||
| 193 | Optimization::VertexATransformPass(vertex_a); | ||
| 194 | Optimization::VertexBTransformPass(vertex_b); | ||
| 195 | for (const auto& term : vertex_a.syntax_list) { | ||
| 196 | if (term.type != IR::AbstractSyntaxNode::Type::Return) { | ||
| 197 | result.syntax_list.push_back(term); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), | ||
| 201 | vertex_b.syntax_list.end()); | ||
| 202 | result.blocks = GenerateBlocks(result.syntax_list); | ||
| 203 | result.post_order_blocks = vertex_b.post_order_blocks; | ||
| 204 | for (const auto& block : vertex_a.post_order_blocks) { | ||
| 205 | result.post_order_blocks.push_back(block); | ||
| 206 | } | ||
| 207 | result.stage = Stage::VertexB; | ||
| 208 | result.info = vertex_a.info; | ||
| 209 | result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); | ||
| 210 | result.info.loads.mask |= vertex_b.info.loads.mask; | ||
| 211 | result.info.stores.mask |= vertex_b.info.stores.mask; | ||
| 212 | |||
| 213 | Optimization::JoinTextureInfo(result.info, vertex_b.info); | ||
| 214 | Optimization::JoinStorageInfo(result.info, vertex_b.info); | ||
| 215 | Optimization::DeadCodeEliminationPass(result); | ||
| 216 | if (Settings::values.renderer_debug) { | ||
| 217 | Optimization::VerificationPass(result); | ||
| 218 | } | ||
| 219 | Optimization::CollectShaderInfoPass(env_vertex_b, result); | ||
| 220 | return result; | ||
| 221 | } | ||
| 222 | |||
| 223 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..a84814811 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 11 | #include "shader_recompiler/host_translate_info.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | |||
| 16 | [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, | ||
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | ||
| 18 | Flow::CFG& cfg, const HostTranslateInfo& host_info); | ||
| 19 | |||
| 20 | [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 21 | Environment& env_vertex_b); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Shader { | ||
| 8 | |||
| 9 | // Try to keep entries here to a minimum | ||
| 10 | // They can accidentally change the cached information in a shader | ||
| 11 | |||
| 12 | /// Misc information about the host | ||
| 13 | struct HostTranslateInfo { | ||
| 14 | bool support_float16{}; ///< True when the device supports 16-bit floats | ||
| 15 | bool support_int64{}; ///< True when the device supports 64-bit integers | ||
| 16 | }; | ||
| 17 | |||
| 18 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp new file mode 100644 index 000000000..5ead930f1 --- /dev/null +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -0,0 +1,928 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/alignment.h" | ||
| 6 | #include "shader_recompiler/environment.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | namespace { | ||
| 15 | void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { | ||
| 16 | if (count != 1) { | ||
| 17 | throw NotImplementedException("Constant buffer descriptor indexing"); | ||
| 18 | } | ||
| 19 | if ((info.constant_buffer_mask & (1U << index)) != 0) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | info.constant_buffer_mask |= 1U << index; | ||
| 23 | |||
| 24 | auto& cbufs{info.constant_buffer_descriptors}; | ||
| 25 | cbufs.insert(std::ranges::lower_bound(cbufs, index, {}, &ConstantBufferDescriptor::index), | ||
| 26 | ConstantBufferDescriptor{ | ||
| 27 | .index = index, | ||
| 28 | .count = 1, | ||
| 29 | }); | ||
| 30 | } | ||
| 31 | |||
| 32 | void GetPatch(Info& info, IR::Patch patch) { | ||
| 33 | if (!IR::IsGeneric(patch)) { | ||
| 34 | throw NotImplementedException("Reading non-generic patch {}", patch); | ||
| 35 | } | ||
| 36 | info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; | ||
| 37 | } | ||
| 38 | |||
| 39 | void SetPatch(Info& info, IR::Patch patch) { | ||
| 40 | if (IR::IsGeneric(patch)) { | ||
| 41 | info.uses_patches.at(IR::GenericPatchIndex(patch)) = true; | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | switch (patch) { | ||
| 45 | case IR::Patch::TessellationLodLeft: | ||
| 46 | case IR::Patch::TessellationLodTop: | ||
| 47 | case IR::Patch::TessellationLodRight: | ||
| 48 | case IR::Patch::TessellationLodBottom: | ||
| 49 | info.stores_tess_level_outer = true; | ||
| 50 | break; | ||
| 51 | case IR::Patch::TessellationLodInteriorU: | ||
| 52 | case IR::Patch::TessellationLodInteriorV: | ||
| 53 | info.stores_tess_level_inner = true; | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Set patch {}", patch); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | void CheckCBufNVN(Info& info, IR::Inst& inst) { | ||
| 61 | const IR::Value cbuf_index{inst.Arg(0)}; | ||
| 62 | if (!cbuf_index.IsImmediate()) { | ||
| 63 | info.nvn_buffer_used.set(); | ||
| 64 | return; | ||
| 65 | } | ||
| 66 | const u32 index{cbuf_index.U32()}; | ||
| 67 | if (index != 0) { | ||
| 68 | return; | ||
| 69 | } | ||
| 70 | const IR::Value cbuf_offset{inst.Arg(1)}; | ||
| 71 | if (!cbuf_offset.IsImmediate()) { | ||
| 72 | info.nvn_buffer_used.set(); | ||
| 73 | return; | ||
| 74 | } | ||
| 75 | const u32 offset{cbuf_offset.U32()}; | ||
| 76 | const u32 descriptor_size{0x10}; | ||
| 77 | const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16}; | ||
| 78 | if (offset >= info.nvn_buffer_base && offset < upper_limit) { | ||
| 79 | const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size}; | ||
| 80 | info.nvn_buffer_used.set(nvn_index, true); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void VisitUsages(Info& info, IR::Inst& inst) { | ||
| 85 | switch (inst.GetOpcode()) { | ||
| 86 | case IR::Opcode::CompositeConstructF16x2: | ||
| 87 | case IR::Opcode::CompositeConstructF16x3: | ||
| 88 | case IR::Opcode::CompositeConstructF16x4: | ||
| 89 | case IR::Opcode::CompositeExtractF16x2: | ||
| 90 | case IR::Opcode::CompositeExtractF16x3: | ||
| 91 | case IR::Opcode::CompositeExtractF16x4: | ||
| 92 | case IR::Opcode::CompositeInsertF16x2: | ||
| 93 | case IR::Opcode::CompositeInsertF16x3: | ||
| 94 | case IR::Opcode::CompositeInsertF16x4: | ||
| 95 | case IR::Opcode::SelectF16: | ||
| 96 | case IR::Opcode::BitCastU16F16: | ||
| 97 | case IR::Opcode::BitCastF16U16: | ||
| 98 | case IR::Opcode::PackFloat2x16: | ||
| 99 | case IR::Opcode::UnpackFloat2x16: | ||
| 100 | case IR::Opcode::ConvertS16F16: | ||
| 101 | case IR::Opcode::ConvertS32F16: | ||
| 102 | case IR::Opcode::ConvertS64F16: | ||
| 103 | case IR::Opcode::ConvertU16F16: | ||
| 104 | case IR::Opcode::ConvertU32F16: | ||
| 105 | case IR::Opcode::ConvertU64F16: | ||
| 106 | case IR::Opcode::ConvertF16S8: | ||
| 107 | case IR::Opcode::ConvertF16S16: | ||
| 108 | case IR::Opcode::ConvertF16S32: | ||
| 109 | case IR::Opcode::ConvertF16S64: | ||
| 110 | case IR::Opcode::ConvertF16U8: | ||
| 111 | case IR::Opcode::ConvertF16U16: | ||
| 112 | case IR::Opcode::ConvertF16U32: | ||
| 113 | case IR::Opcode::ConvertF16U64: | ||
| 114 | case IR::Opcode::FPAbs16: | ||
| 115 | case IR::Opcode::FPAdd16: | ||
| 116 | case IR::Opcode::FPCeil16: | ||
| 117 | case IR::Opcode::FPFloor16: | ||
| 118 | case IR::Opcode::FPFma16: | ||
| 119 | case IR::Opcode::FPMul16: | ||
| 120 | case IR::Opcode::FPNeg16: | ||
| 121 | case IR::Opcode::FPRoundEven16: | ||
| 122 | case IR::Opcode::FPSaturate16: | ||
| 123 | case IR::Opcode::FPClamp16: | ||
| 124 | case IR::Opcode::FPTrunc16: | ||
| 125 | case IR::Opcode::FPOrdEqual16: | ||
| 126 | case IR::Opcode::FPUnordEqual16: | ||
| 127 | case IR::Opcode::FPOrdNotEqual16: | ||
| 128 | case IR::Opcode::FPUnordNotEqual16: | ||
| 129 | case IR::Opcode::FPOrdLessThan16: | ||
| 130 | case IR::Opcode::FPUnordLessThan16: | ||
| 131 | case IR::Opcode::FPOrdGreaterThan16: | ||
| 132 | case IR::Opcode::FPUnordGreaterThan16: | ||
| 133 | case IR::Opcode::FPOrdLessThanEqual16: | ||
| 134 | case IR::Opcode::FPUnordLessThanEqual16: | ||
| 135 | case IR::Opcode::FPOrdGreaterThanEqual16: | ||
| 136 | case IR::Opcode::FPUnordGreaterThanEqual16: | ||
| 137 | case IR::Opcode::FPIsNan16: | ||
| 138 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 139 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 140 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 141 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 142 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 143 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 144 | info.uses_fp16 = true; | ||
| 145 | break; | ||
| 146 | case IR::Opcode::CompositeConstructF64x2: | ||
| 147 | case IR::Opcode::CompositeConstructF64x3: | ||
| 148 | case IR::Opcode::CompositeConstructF64x4: | ||
| 149 | case IR::Opcode::CompositeExtractF64x2: | ||
| 150 | case IR::Opcode::CompositeExtractF64x3: | ||
| 151 | case IR::Opcode::CompositeExtractF64x4: | ||
| 152 | case IR::Opcode::CompositeInsertF64x2: | ||
| 153 | case IR::Opcode::CompositeInsertF64x3: | ||
| 154 | case IR::Opcode::CompositeInsertF64x4: | ||
| 155 | case IR::Opcode::SelectF64: | ||
| 156 | case IR::Opcode::BitCastU64F64: | ||
| 157 | case IR::Opcode::BitCastF64U64: | ||
| 158 | case IR::Opcode::PackDouble2x32: | ||
| 159 | case IR::Opcode::UnpackDouble2x32: | ||
| 160 | case IR::Opcode::FPAbs64: | ||
| 161 | case IR::Opcode::FPAdd64: | ||
| 162 | case IR::Opcode::FPCeil64: | ||
| 163 | case IR::Opcode::FPFloor64: | ||
| 164 | case IR::Opcode::FPFma64: | ||
| 165 | case IR::Opcode::FPMax64: | ||
| 166 | case IR::Opcode::FPMin64: | ||
| 167 | case IR::Opcode::FPMul64: | ||
| 168 | case IR::Opcode::FPNeg64: | ||
| 169 | case IR::Opcode::FPRecip64: | ||
| 170 | case IR::Opcode::FPRecipSqrt64: | ||
| 171 | case IR::Opcode::FPRoundEven64: | ||
| 172 | case IR::Opcode::FPSaturate64: | ||
| 173 | case IR::Opcode::FPClamp64: | ||
| 174 | case IR::Opcode::FPTrunc64: | ||
| 175 | case IR::Opcode::FPOrdEqual64: | ||
| 176 | case IR::Opcode::FPUnordEqual64: | ||
| 177 | case IR::Opcode::FPOrdNotEqual64: | ||
| 178 | case IR::Opcode::FPUnordNotEqual64: | ||
| 179 | case IR::Opcode::FPOrdLessThan64: | ||
| 180 | case IR::Opcode::FPUnordLessThan64: | ||
| 181 | case IR::Opcode::FPOrdGreaterThan64: | ||
| 182 | case IR::Opcode::FPUnordGreaterThan64: | ||
| 183 | case IR::Opcode::FPOrdLessThanEqual64: | ||
| 184 | case IR::Opcode::FPUnordLessThanEqual64: | ||
| 185 | case IR::Opcode::FPOrdGreaterThanEqual64: | ||
| 186 | case IR::Opcode::FPUnordGreaterThanEqual64: | ||
| 187 | case IR::Opcode::FPIsNan64: | ||
| 188 | case IR::Opcode::ConvertS16F64: | ||
| 189 | case IR::Opcode::ConvertS32F64: | ||
| 190 | case IR::Opcode::ConvertS64F64: | ||
| 191 | case IR::Opcode::ConvertU16F64: | ||
| 192 | case IR::Opcode::ConvertU32F64: | ||
| 193 | case IR::Opcode::ConvertU64F64: | ||
| 194 | case IR::Opcode::ConvertF32F64: | ||
| 195 | case IR::Opcode::ConvertF64F32: | ||
| 196 | case IR::Opcode::ConvertF64S8: | ||
| 197 | case IR::Opcode::ConvertF64S16: | ||
| 198 | case IR::Opcode::ConvertF64S32: | ||
| 199 | case IR::Opcode::ConvertF64S64: | ||
| 200 | case IR::Opcode::ConvertF64U8: | ||
| 201 | case IR::Opcode::ConvertF64U16: | ||
| 202 | case IR::Opcode::ConvertF64U32: | ||
| 203 | case IR::Opcode::ConvertF64U64: | ||
| 204 | info.uses_fp64 = true; | ||
| 205 | break; | ||
| 206 | default: | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | switch (inst.GetOpcode()) { | ||
| 210 | case IR::Opcode::GetCbufU8: | ||
| 211 | case IR::Opcode::GetCbufS8: | ||
| 212 | case IR::Opcode::UndefU8: | ||
| 213 | case IR::Opcode::LoadGlobalU8: | ||
| 214 | case IR::Opcode::LoadGlobalS8: | ||
| 215 | case IR::Opcode::WriteGlobalU8: | ||
| 216 | case IR::Opcode::WriteGlobalS8: | ||
| 217 | case IR::Opcode::LoadStorageU8: | ||
| 218 | case IR::Opcode::LoadStorageS8: | ||
| 219 | case IR::Opcode::WriteStorageU8: | ||
| 220 | case IR::Opcode::WriteStorageS8: | ||
| 221 | case IR::Opcode::LoadSharedU8: | ||
| 222 | case IR::Opcode::LoadSharedS8: | ||
| 223 | case IR::Opcode::WriteSharedU8: | ||
| 224 | case IR::Opcode::SelectU8: | ||
| 225 | case IR::Opcode::ConvertF16S8: | ||
| 226 | case IR::Opcode::ConvertF16U8: | ||
| 227 | case IR::Opcode::ConvertF32S8: | ||
| 228 | case IR::Opcode::ConvertF32U8: | ||
| 229 | case IR::Opcode::ConvertF64S8: | ||
| 230 | case IR::Opcode::ConvertF64U8: | ||
| 231 | info.uses_int8 = true; | ||
| 232 | break; | ||
| 233 | default: | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | switch (inst.GetOpcode()) { | ||
| 237 | case IR::Opcode::GetCbufU16: | ||
| 238 | case IR::Opcode::GetCbufS16: | ||
| 239 | case IR::Opcode::UndefU16: | ||
| 240 | case IR::Opcode::LoadGlobalU16: | ||
| 241 | case IR::Opcode::LoadGlobalS16: | ||
| 242 | case IR::Opcode::WriteGlobalU16: | ||
| 243 | case IR::Opcode::WriteGlobalS16: | ||
| 244 | case IR::Opcode::LoadStorageU16: | ||
| 245 | case IR::Opcode::LoadStorageS16: | ||
| 246 | case IR::Opcode::WriteStorageU16: | ||
| 247 | case IR::Opcode::WriteStorageS16: | ||
| 248 | case IR::Opcode::LoadSharedU16: | ||
| 249 | case IR::Opcode::LoadSharedS16: | ||
| 250 | case IR::Opcode::WriteSharedU16: | ||
| 251 | case IR::Opcode::SelectU16: | ||
| 252 | case IR::Opcode::BitCastU16F16: | ||
| 253 | case IR::Opcode::BitCastF16U16: | ||
| 254 | case IR::Opcode::ConvertS16F16: | ||
| 255 | case IR::Opcode::ConvertS16F32: | ||
| 256 | case IR::Opcode::ConvertS16F64: | ||
| 257 | case IR::Opcode::ConvertU16F16: | ||
| 258 | case IR::Opcode::ConvertU16F32: | ||
| 259 | case IR::Opcode::ConvertU16F64: | ||
| 260 | case IR::Opcode::ConvertF16S16: | ||
| 261 | case IR::Opcode::ConvertF16U16: | ||
| 262 | case IR::Opcode::ConvertF32S16: | ||
| 263 | case IR::Opcode::ConvertF32U16: | ||
| 264 | case IR::Opcode::ConvertF64S16: | ||
| 265 | case IR::Opcode::ConvertF64U16: | ||
| 266 | info.uses_int16 = true; | ||
| 267 | break; | ||
| 268 | default: | ||
| 269 | break; | ||
| 270 | } | ||
| 271 | switch (inst.GetOpcode()) { | ||
| 272 | case IR::Opcode::UndefU64: | ||
| 273 | case IR::Opcode::LoadGlobalU8: | ||
| 274 | case IR::Opcode::LoadGlobalS8: | ||
| 275 | case IR::Opcode::LoadGlobalU16: | ||
| 276 | case IR::Opcode::LoadGlobalS16: | ||
| 277 | case IR::Opcode::LoadGlobal32: | ||
| 278 | case IR::Opcode::LoadGlobal64: | ||
| 279 | case IR::Opcode::LoadGlobal128: | ||
| 280 | case IR::Opcode::WriteGlobalU8: | ||
| 281 | case IR::Opcode::WriteGlobalS8: | ||
| 282 | case IR::Opcode::WriteGlobalU16: | ||
| 283 | case IR::Opcode::WriteGlobalS16: | ||
| 284 | case IR::Opcode::WriteGlobal32: | ||
| 285 | case IR::Opcode::WriteGlobal64: | ||
| 286 | case IR::Opcode::WriteGlobal128: | ||
| 287 | case IR::Opcode::SelectU64: | ||
| 288 | case IR::Opcode::BitCastU64F64: | ||
| 289 | case IR::Opcode::BitCastF64U64: | ||
| 290 | case IR::Opcode::PackUint2x32: | ||
| 291 | case IR::Opcode::UnpackUint2x32: | ||
| 292 | case IR::Opcode::IAdd64: | ||
| 293 | case IR::Opcode::ISub64: | ||
| 294 | case IR::Opcode::INeg64: | ||
| 295 | case IR::Opcode::ShiftLeftLogical64: | ||
| 296 | case IR::Opcode::ShiftRightLogical64: | ||
| 297 | case IR::Opcode::ShiftRightArithmetic64: | ||
| 298 | case IR::Opcode::ConvertS64F16: | ||
| 299 | case IR::Opcode::ConvertS64F32: | ||
| 300 | case IR::Opcode::ConvertS64F64: | ||
| 301 | case IR::Opcode::ConvertU64F16: | ||
| 302 | case IR::Opcode::ConvertU64F32: | ||
| 303 | case IR::Opcode::ConvertU64F64: | ||
| 304 | case IR::Opcode::ConvertU64U32: | ||
| 305 | case IR::Opcode::ConvertU32U64: | ||
| 306 | case IR::Opcode::ConvertF16U64: | ||
| 307 | case IR::Opcode::ConvertF32U64: | ||
| 308 | case IR::Opcode::ConvertF64U64: | ||
| 309 | case IR::Opcode::SharedAtomicExchange64: | ||
| 310 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 311 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 312 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 313 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 314 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 315 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 316 | case IR::Opcode::GlobalAtomicOr64: | ||
| 317 | case IR::Opcode::GlobalAtomicXor64: | ||
| 318 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 319 | case IR::Opcode::StorageAtomicIAdd64: | ||
| 320 | case IR::Opcode::StorageAtomicSMin64: | ||
| 321 | case IR::Opcode::StorageAtomicUMin64: | ||
| 322 | case IR::Opcode::StorageAtomicSMax64: | ||
| 323 | case IR::Opcode::StorageAtomicUMax64: | ||
| 324 | case IR::Opcode::StorageAtomicAnd64: | ||
| 325 | case IR::Opcode::StorageAtomicOr64: | ||
| 326 | case IR::Opcode::StorageAtomicXor64: | ||
| 327 | case IR::Opcode::StorageAtomicExchange64: | ||
| 328 | info.uses_int64 = true; | ||
| 329 | break; | ||
| 330 | default: | ||
| 331 | break; | ||
| 332 | } | ||
| 333 | switch (inst.GetOpcode()) { | ||
| 334 | case IR::Opcode::WriteGlobalU8: | ||
| 335 | case IR::Opcode::WriteGlobalS8: | ||
| 336 | case IR::Opcode::WriteGlobalU16: | ||
| 337 | case IR::Opcode::WriteGlobalS16: | ||
| 338 | case IR::Opcode::WriteGlobal32: | ||
| 339 | case IR::Opcode::WriteGlobal64: | ||
| 340 | case IR::Opcode::WriteGlobal128: | ||
| 341 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 342 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 343 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 344 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 345 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 346 | case IR::Opcode::GlobalAtomicInc32: | ||
| 347 | case IR::Opcode::GlobalAtomicDec32: | ||
| 348 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 349 | case IR::Opcode::GlobalAtomicOr32: | ||
| 350 | case IR::Opcode::GlobalAtomicXor32: | ||
| 351 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 352 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 353 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 354 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 355 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 356 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 357 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 358 | case IR::Opcode::GlobalAtomicOr64: | ||
| 359 | case IR::Opcode::GlobalAtomicXor64: | ||
| 360 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 361 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 362 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 363 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 364 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 365 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 366 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 367 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 368 | info.stores_global_memory = true; | ||
| 369 | [[fallthrough]]; | ||
| 370 | case IR::Opcode::LoadGlobalU8: | ||
| 371 | case IR::Opcode::LoadGlobalS8: | ||
| 372 | case IR::Opcode::LoadGlobalU16: | ||
| 373 | case IR::Opcode::LoadGlobalS16: | ||
| 374 | case IR::Opcode::LoadGlobal32: | ||
| 375 | case IR::Opcode::LoadGlobal64: | ||
| 376 | case IR::Opcode::LoadGlobal128: | ||
| 377 | info.uses_int64 = true; | ||
| 378 | info.uses_global_memory = true; | ||
| 379 | info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; | ||
| 380 | info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; | ||
| 381 | break; | ||
| 382 | default: | ||
| 383 | break; | ||
| 384 | } | ||
| 385 | switch (inst.GetOpcode()) { | ||
| 386 | case IR::Opcode::DemoteToHelperInvocation: | ||
| 387 | info.uses_demote_to_helper_invocation = true; | ||
| 388 | break; | ||
| 389 | case IR::Opcode::GetAttribute: | ||
| 390 | info.loads.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; | ||
| 391 | break; | ||
| 392 | case IR::Opcode::SetAttribute: | ||
| 393 | info.stores.mask[static_cast<size_t>(inst.Arg(0).Attribute())] = true; | ||
| 394 | break; | ||
| 395 | case IR::Opcode::GetPatch: | ||
| 396 | GetPatch(info, inst.Arg(0).Patch()); | ||
| 397 | break; | ||
| 398 | case IR::Opcode::SetPatch: | ||
| 399 | SetPatch(info, inst.Arg(0).Patch()); | ||
| 400 | break; | ||
| 401 | case IR::Opcode::GetAttributeIndexed: | ||
| 402 | info.loads_indexed_attributes = true; | ||
| 403 | break; | ||
| 404 | case IR::Opcode::SetAttributeIndexed: | ||
| 405 | info.stores_indexed_attributes = true; | ||
| 406 | break; | ||
| 407 | case IR::Opcode::SetFragColor: | ||
| 408 | info.stores_frag_color[inst.Arg(0).U32()] = true; | ||
| 409 | break; | ||
| 410 | case IR::Opcode::SetSampleMask: | ||
| 411 | info.stores_sample_mask = true; | ||
| 412 | break; | ||
| 413 | case IR::Opcode::SetFragDepth: | ||
| 414 | info.stores_frag_depth = true; | ||
| 415 | break; | ||
| 416 | case IR::Opcode::WorkgroupId: | ||
| 417 | info.uses_workgroup_id = true; | ||
| 418 | break; | ||
| 419 | case IR::Opcode::LocalInvocationId: | ||
| 420 | info.uses_local_invocation_id = true; | ||
| 421 | break; | ||
| 422 | case IR::Opcode::InvocationId: | ||
| 423 | info.uses_invocation_id = true; | ||
| 424 | break; | ||
| 425 | case IR::Opcode::SampleId: | ||
| 426 | info.uses_sample_id = true; | ||
| 427 | break; | ||
| 428 | case IR::Opcode::IsHelperInvocation: | ||
| 429 | info.uses_is_helper_invocation = true; | ||
| 430 | break; | ||
| 431 | case IR::Opcode::LaneId: | ||
| 432 | info.uses_subgroup_invocation_id = true; | ||
| 433 | break; | ||
| 434 | case IR::Opcode::ShuffleIndex: | ||
| 435 | case IR::Opcode::ShuffleUp: | ||
| 436 | case IR::Opcode::ShuffleDown: | ||
| 437 | case IR::Opcode::ShuffleButterfly: | ||
| 438 | info.uses_subgroup_shuffles = true; | ||
| 439 | break; | ||
| 440 | case IR::Opcode::GetCbufU8: | ||
| 441 | case IR::Opcode::GetCbufS8: | ||
| 442 | case IR::Opcode::GetCbufU16: | ||
| 443 | case IR::Opcode::GetCbufS16: | ||
| 444 | case IR::Opcode::GetCbufU32: | ||
| 445 | case IR::Opcode::GetCbufF32: | ||
| 446 | case IR::Opcode::GetCbufU32x2: { | ||
| 447 | const IR::Value index{inst.Arg(0)}; | ||
| 448 | const IR::Value offset{inst.Arg(1)}; | ||
| 449 | if (!index.IsImmediate()) { | ||
| 450 | throw NotImplementedException("Constant buffer with non-immediate index"); | ||
| 451 | } | ||
| 452 | AddConstantBufferDescriptor(info, index.U32(), 1); | ||
| 453 | u32 element_size{}; | ||
| 454 | switch (inst.GetOpcode()) { | ||
| 455 | case IR::Opcode::GetCbufU8: | ||
| 456 | case IR::Opcode::GetCbufS8: | ||
| 457 | info.used_constant_buffer_types |= IR::Type::U8; | ||
| 458 | element_size = 1; | ||
| 459 | break; | ||
| 460 | case IR::Opcode::GetCbufU16: | ||
| 461 | case IR::Opcode::GetCbufS16: | ||
| 462 | info.used_constant_buffer_types |= IR::Type::U16; | ||
| 463 | element_size = 2; | ||
| 464 | break; | ||
| 465 | case IR::Opcode::GetCbufU32: | ||
| 466 | info.used_constant_buffer_types |= IR::Type::U32; | ||
| 467 | element_size = 4; | ||
| 468 | break; | ||
| 469 | case IR::Opcode::GetCbufF32: | ||
| 470 | info.used_constant_buffer_types |= IR::Type::F32; | ||
| 471 | element_size = 4; | ||
| 472 | break; | ||
| 473 | case IR::Opcode::GetCbufU32x2: | ||
| 474 | info.used_constant_buffer_types |= IR::Type::U32x2; | ||
| 475 | element_size = 8; | ||
| 476 | break; | ||
| 477 | default: | ||
| 478 | break; | ||
| 479 | } | ||
| 480 | u32& size{info.constant_buffer_used_sizes[index.U32()]}; | ||
| 481 | if (offset.IsImmediate()) { | ||
| 482 | size = Common::AlignUp(std::max(size, offset.U32() + element_size), 16u); | ||
| 483 | } else { | ||
| 484 | size = 0x10'000; | ||
| 485 | } | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 489 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 490 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 491 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 492 | case IR::Opcode::BindlessImageGather: | ||
| 493 | case IR::Opcode::BindlessImageGatherDref: | ||
| 494 | case IR::Opcode::BindlessImageFetch: | ||
| 495 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 496 | case IR::Opcode::BindlessImageQueryLod: | ||
| 497 | case IR::Opcode::BindlessImageGradient: | ||
| 498 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 499 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 500 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 501 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 502 | case IR::Opcode::BoundImageGather: | ||
| 503 | case IR::Opcode::BoundImageGatherDref: | ||
| 504 | case IR::Opcode::BoundImageFetch: | ||
| 505 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 506 | case IR::Opcode::BoundImageQueryLod: | ||
| 507 | case IR::Opcode::BoundImageGradient: | ||
| 508 | case IR::Opcode::ImageGather: | ||
| 509 | case IR::Opcode::ImageGatherDref: | ||
| 510 | case IR::Opcode::ImageFetch: | ||
| 511 | case IR::Opcode::ImageQueryDimensions: | ||
| 512 | case IR::Opcode::ImageGradient: { | ||
| 513 | const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; | ||
| 514 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; | ||
| 515 | info.uses_sparse_residency |= | ||
| 516 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 517 | break; | ||
| 518 | } | ||
| 519 | case IR::Opcode::ImageSampleImplicitLod: | ||
| 520 | case IR::Opcode::ImageSampleExplicitLod: | ||
| 521 | case IR::Opcode::ImageSampleDrefImplicitLod: | ||
| 522 | case IR::Opcode::ImageSampleDrefExplicitLod: | ||
| 523 | case IR::Opcode::ImageQueryLod: { | ||
| 524 | const auto flags{inst.Flags<IR::TextureInstInfo>()}; | ||
| 525 | const TextureType type{flags.type}; | ||
| 526 | info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D; | ||
| 527 | info.uses_shadow_lod |= flags.is_depth != 0; | ||
| 528 | info.uses_sparse_residency |= | ||
| 529 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 530 | break; | ||
| 531 | } | ||
| 532 | case IR::Opcode::ImageRead: { | ||
| 533 | const auto flags{inst.Flags<IR::TextureInstInfo>()}; | ||
| 534 | info.uses_typeless_image_reads |= flags.image_format == ImageFormat::Typeless; | ||
| 535 | info.uses_sparse_residency |= | ||
| 536 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr; | ||
| 537 | break; | ||
| 538 | } | ||
| 539 | case IR::Opcode::ImageWrite: { | ||
| 540 | const auto flags{inst.Flags<IR::TextureInstInfo>()}; | ||
| 541 | info.uses_typeless_image_writes |= flags.image_format == ImageFormat::Typeless; | ||
| 542 | info.uses_image_buffers |= flags.type == TextureType::Buffer; | ||
| 543 | break; | ||
| 544 | } | ||
| 545 | case IR::Opcode::SubgroupEqMask: | ||
| 546 | case IR::Opcode::SubgroupLtMask: | ||
| 547 | case IR::Opcode::SubgroupLeMask: | ||
| 548 | case IR::Opcode::SubgroupGtMask: | ||
| 549 | case IR::Opcode::SubgroupGeMask: | ||
| 550 | info.uses_subgroup_mask = true; | ||
| 551 | break; | ||
| 552 | case IR::Opcode::VoteAll: | ||
| 553 | case IR::Opcode::VoteAny: | ||
| 554 | case IR::Opcode::VoteEqual: | ||
| 555 | case IR::Opcode::SubgroupBallot: | ||
| 556 | info.uses_subgroup_vote = true; | ||
| 557 | break; | ||
| 558 | case IR::Opcode::FSwizzleAdd: | ||
| 559 | info.uses_fswzadd = true; | ||
| 560 | break; | ||
| 561 | case IR::Opcode::DPdxFine: | ||
| 562 | case IR::Opcode::DPdyFine: | ||
| 563 | case IR::Opcode::DPdxCoarse: | ||
| 564 | case IR::Opcode::DPdyCoarse: | ||
| 565 | info.uses_derivatives = true; | ||
| 566 | break; | ||
| 567 | case IR::Opcode::LoadStorageU8: | ||
| 568 | case IR::Opcode::LoadStorageS8: | ||
| 569 | case IR::Opcode::WriteStorageU8: | ||
| 570 | case IR::Opcode::WriteStorageS8: | ||
| 571 | info.used_storage_buffer_types |= IR::Type::U8; | ||
| 572 | break; | ||
| 573 | case IR::Opcode::LoadStorageU16: | ||
| 574 | case IR::Opcode::LoadStorageS16: | ||
| 575 | case IR::Opcode::WriteStorageU16: | ||
| 576 | case IR::Opcode::WriteStorageS16: | ||
| 577 | info.used_storage_buffer_types |= IR::Type::U16; | ||
| 578 | break; | ||
| 579 | case IR::Opcode::LoadStorage32: | ||
| 580 | case IR::Opcode::WriteStorage32: | ||
| 581 | case IR::Opcode::StorageAtomicIAdd32: | ||
| 582 | case IR::Opcode::StorageAtomicUMin32: | ||
| 583 | case IR::Opcode::StorageAtomicUMax32: | ||
| 584 | case IR::Opcode::StorageAtomicAnd32: | ||
| 585 | case IR::Opcode::StorageAtomicOr32: | ||
| 586 | case IR::Opcode::StorageAtomicXor32: | ||
| 587 | case IR::Opcode::StorageAtomicExchange32: | ||
| 588 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 589 | break; | ||
| 590 | case IR::Opcode::LoadStorage64: | ||
| 591 | case IR::Opcode::WriteStorage64: | ||
| 592 | info.used_storage_buffer_types |= IR::Type::U32x2; | ||
| 593 | break; | ||
| 594 | case IR::Opcode::LoadStorage128: | ||
| 595 | case IR::Opcode::WriteStorage128: | ||
| 596 | info.used_storage_buffer_types |= IR::Type::U32x4; | ||
| 597 | break; | ||
| 598 | case IR::Opcode::SharedAtomicSMin32: | ||
| 599 | info.uses_atomic_s32_min = true; | ||
| 600 | break; | ||
| 601 | case IR::Opcode::SharedAtomicSMax32: | ||
| 602 | info.uses_atomic_s32_max = true; | ||
| 603 | break; | ||
| 604 | case IR::Opcode::SharedAtomicInc32: | ||
| 605 | info.uses_shared_increment = true; | ||
| 606 | break; | ||
| 607 | case IR::Opcode::SharedAtomicDec32: | ||
| 608 | info.uses_shared_decrement = true; | ||
| 609 | break; | ||
| 610 | case IR::Opcode::SharedAtomicExchange64: | ||
| 611 | info.uses_int64_bit_atomics = true; | ||
| 612 | break; | ||
| 613 | case IR::Opcode::GlobalAtomicInc32: | ||
| 614 | case IR::Opcode::StorageAtomicInc32: | ||
| 615 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 616 | info.uses_global_increment = true; | ||
| 617 | break; | ||
| 618 | case IR::Opcode::GlobalAtomicDec32: | ||
| 619 | case IR::Opcode::StorageAtomicDec32: | ||
| 620 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 621 | info.uses_global_decrement = true; | ||
| 622 | break; | ||
| 623 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 624 | case IR::Opcode::StorageAtomicAddF32: | ||
| 625 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 626 | info.uses_atomic_f32_add = true; | ||
| 627 | break; | ||
| 628 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 629 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 630 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 631 | info.uses_atomic_f16x2_add = true; | ||
| 632 | break; | ||
| 633 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 634 | case IR::Opcode::StorageAtomicAddF32x2: | ||
| 635 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 636 | info.uses_atomic_f32x2_add = true; | ||
| 637 | break; | ||
| 638 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 639 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 640 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 641 | info.uses_atomic_f16x2_min = true; | ||
| 642 | break; | ||
| 643 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 644 | case IR::Opcode::StorageAtomicMinF32x2: | ||
| 645 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 646 | info.uses_atomic_f32x2_min = true; | ||
| 647 | break; | ||
| 648 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 649 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 650 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 651 | info.uses_atomic_f16x2_max = true; | ||
| 652 | break; | ||
| 653 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 654 | case IR::Opcode::StorageAtomicMaxF32x2: | ||
| 655 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 656 | info.uses_atomic_f32x2_max = true; | ||
| 657 | break; | ||
| 658 | case IR::Opcode::StorageAtomicSMin32: | ||
| 659 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 660 | info.uses_atomic_s32_min = true; | ||
| 661 | break; | ||
| 662 | case IR::Opcode::StorageAtomicSMax32: | ||
| 663 | info.used_storage_buffer_types |= IR::Type::U32; | ||
| 664 | info.uses_atomic_s32_max = true; | ||
| 665 | break; | ||
| 666 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 667 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 668 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 669 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 670 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 671 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 672 | case IR::Opcode::GlobalAtomicOr64: | ||
| 673 | case IR::Opcode::GlobalAtomicXor64: | ||
| 674 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 675 | case IR::Opcode::StorageAtomicIAdd64: | ||
| 676 | case IR::Opcode::StorageAtomicSMin64: | ||
| 677 | case IR::Opcode::StorageAtomicUMin64: | ||
| 678 | case IR::Opcode::StorageAtomicSMax64: | ||
| 679 | case IR::Opcode::StorageAtomicUMax64: | ||
| 680 | case IR::Opcode::StorageAtomicAnd64: | ||
| 681 | case IR::Opcode::StorageAtomicOr64: | ||
| 682 | case IR::Opcode::StorageAtomicXor64: | ||
| 683 | info.used_storage_buffer_types |= IR::Type::U64; | ||
| 684 | info.uses_int64_bit_atomics = true; | ||
| 685 | break; | ||
| 686 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 687 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 688 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 689 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 690 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 691 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 692 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 693 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 694 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 695 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 696 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 697 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 698 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 699 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 700 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 701 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 702 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 703 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 704 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 705 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 706 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 707 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 708 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 709 | case IR::Opcode::ImageAtomicSMin32: | ||
| 710 | case IR::Opcode::ImageAtomicUMin32: | ||
| 711 | case IR::Opcode::ImageAtomicSMax32: | ||
| 712 | case IR::Opcode::ImageAtomicUMax32: | ||
| 713 | case IR::Opcode::ImageAtomicInc32: | ||
| 714 | case IR::Opcode::ImageAtomicDec32: | ||
| 715 | case IR::Opcode::ImageAtomicAnd32: | ||
| 716 | case IR::Opcode::ImageAtomicOr32: | ||
| 717 | case IR::Opcode::ImageAtomicXor32: | ||
| 718 | case IR::Opcode::ImageAtomicExchange32: | ||
| 719 | info.uses_atomic_image_u32 = true; | ||
| 720 | break; | ||
| 721 | default: | ||
| 722 | break; | ||
| 723 | } | ||
| 724 | } | ||
| 725 | |||
| 726 | void VisitFpModifiers(Info& info, IR::Inst& inst) { | ||
| 727 | switch (inst.GetOpcode()) { | ||
| 728 | case IR::Opcode::FPAdd16: | ||
| 729 | case IR::Opcode::FPFma16: | ||
| 730 | case IR::Opcode::FPMul16: | ||
| 731 | case IR::Opcode::FPRoundEven16: | ||
| 732 | case IR::Opcode::FPFloor16: | ||
| 733 | case IR::Opcode::FPCeil16: | ||
| 734 | case IR::Opcode::FPTrunc16: { | ||
| 735 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 736 | switch (control.fmz_mode) { | ||
| 737 | case IR::FmzMode::DontCare: | ||
| 738 | break; | ||
| 739 | case IR::FmzMode::FTZ: | ||
| 740 | case IR::FmzMode::FMZ: | ||
| 741 | info.uses_fp16_denorms_flush = true; | ||
| 742 | break; | ||
| 743 | case IR::FmzMode::None: | ||
| 744 | info.uses_fp16_denorms_preserve = true; | ||
| 745 | break; | ||
| 746 | } | ||
| 747 | break; | ||
| 748 | } | ||
| 749 | case IR::Opcode::FPAdd32: | ||
| 750 | case IR::Opcode::FPFma32: | ||
| 751 | case IR::Opcode::FPMul32: | ||
| 752 | case IR::Opcode::FPRoundEven32: | ||
| 753 | case IR::Opcode::FPFloor32: | ||
| 754 | case IR::Opcode::FPCeil32: | ||
| 755 | case IR::Opcode::FPTrunc32: | ||
| 756 | case IR::Opcode::FPOrdEqual32: | ||
| 757 | case IR::Opcode::FPUnordEqual32: | ||
| 758 | case IR::Opcode::FPOrdNotEqual32: | ||
| 759 | case IR::Opcode::FPUnordNotEqual32: | ||
| 760 | case IR::Opcode::FPOrdLessThan32: | ||
| 761 | case IR::Opcode::FPUnordLessThan32: | ||
| 762 | case IR::Opcode::FPOrdGreaterThan32: | ||
| 763 | case IR::Opcode::FPUnordGreaterThan32: | ||
| 764 | case IR::Opcode::FPOrdLessThanEqual32: | ||
| 765 | case IR::Opcode::FPUnordLessThanEqual32: | ||
| 766 | case IR::Opcode::FPOrdGreaterThanEqual32: | ||
| 767 | case IR::Opcode::FPUnordGreaterThanEqual32: | ||
| 768 | case IR::Opcode::ConvertF16F32: | ||
| 769 | case IR::Opcode::ConvertF64F32: { | ||
| 770 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 771 | switch (control.fmz_mode) { | ||
| 772 | case IR::FmzMode::DontCare: | ||
| 773 | break; | ||
| 774 | case IR::FmzMode::FTZ: | ||
| 775 | case IR::FmzMode::FMZ: | ||
| 776 | info.uses_fp32_denorms_flush = true; | ||
| 777 | break; | ||
| 778 | case IR::FmzMode::None: | ||
| 779 | info.uses_fp32_denorms_preserve = true; | ||
| 780 | break; | ||
| 781 | } | ||
| 782 | break; | ||
| 783 | } | ||
| 784 | default: | ||
| 785 | break; | ||
| 786 | } | ||
| 787 | } | ||
| 788 | |||
| 789 | void VisitCbufs(Info& info, IR::Inst& inst) { | ||
| 790 | switch (inst.GetOpcode()) { | ||
| 791 | case IR::Opcode::GetCbufU8: | ||
| 792 | case IR::Opcode::GetCbufS8: | ||
| 793 | case IR::Opcode::GetCbufU16: | ||
| 794 | case IR::Opcode::GetCbufS16: | ||
| 795 | case IR::Opcode::GetCbufU32: | ||
| 796 | case IR::Opcode::GetCbufF32: | ||
| 797 | case IR::Opcode::GetCbufU32x2: { | ||
| 798 | CheckCBufNVN(info, inst); | ||
| 799 | break; | ||
| 800 | } | ||
| 801 | default: | ||
| 802 | break; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | |||
| 806 | void Visit(Info& info, IR::Inst& inst) { | ||
| 807 | VisitUsages(info, inst); | ||
| 808 | VisitFpModifiers(info, inst); | ||
| 809 | VisitCbufs(info, inst); | ||
| 810 | } | ||
| 811 | |||
| 812 | void GatherInfoFromHeader(Environment& env, Info& info) { | ||
| 813 | Stage stage{env.ShaderStage()}; | ||
| 814 | if (stage == Stage::Compute) { | ||
| 815 | return; | ||
| 816 | } | ||
| 817 | const auto& header{env.SPH()}; | ||
| 818 | if (stage == Stage::Fragment) { | ||
| 819 | if (!info.loads_indexed_attributes) { | ||
| 820 | return; | ||
| 821 | } | ||
| 822 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 823 | const size_t offset{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 824 | const auto vector{header.ps.imap_generic_vector[index]}; | ||
| 825 | info.loads.mask[offset + 0] = vector.x != PixelImap::Unused; | ||
| 826 | info.loads.mask[offset + 1] = vector.y != PixelImap::Unused; | ||
| 827 | info.loads.mask[offset + 2] = vector.z != PixelImap::Unused; | ||
| 828 | info.loads.mask[offset + 3] = vector.w != PixelImap::Unused; | ||
| 829 | } | ||
| 830 | return; | ||
| 831 | } | ||
| 832 | if (info.loads_indexed_attributes) { | ||
| 833 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 834 | const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; | ||
| 835 | const auto mask = header.vtg.InputGeneric(index); | ||
| 836 | for (size_t i = 0; i < 4; ++i) { | ||
| 837 | info.loads.Set(attribute + i, mask[i]); | ||
| 838 | } | ||
| 839 | } | ||
| 840 | for (size_t index = 0; index < 8; ++index) { | ||
| 841 | const u16 mask{header.vtg.clip_distances}; | ||
| 842 | info.loads.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); | ||
| 843 | } | ||
| 844 | info.loads.Set(IR::Attribute::PrimitiveId, header.vtg.imap_systemb.primitive_array_id != 0); | ||
| 845 | info.loads.Set(IR::Attribute::Layer, header.vtg.imap_systemb.rt_array_index != 0); | ||
| 846 | info.loads.Set(IR::Attribute::ViewportIndex, header.vtg.imap_systemb.viewport_index != 0); | ||
| 847 | info.loads.Set(IR::Attribute::PointSize, header.vtg.imap_systemb.point_size != 0); | ||
| 848 | info.loads.Set(IR::Attribute::PositionX, header.vtg.imap_systemb.position_x != 0); | ||
| 849 | info.loads.Set(IR::Attribute::PositionY, header.vtg.imap_systemb.position_y != 0); | ||
| 850 | info.loads.Set(IR::Attribute::PositionZ, header.vtg.imap_systemb.position_z != 0); | ||
| 851 | info.loads.Set(IR::Attribute::PositionW, header.vtg.imap_systemb.position_w != 0); | ||
| 852 | info.loads.Set(IR::Attribute::PointSpriteS, header.vtg.point_sprite_s != 0); | ||
| 853 | info.loads.Set(IR::Attribute::PointSpriteT, header.vtg.point_sprite_t != 0); | ||
| 854 | info.loads.Set(IR::Attribute::FogCoordinate, header.vtg.fog_coordinate != 0); | ||
| 855 | info.loads.Set(IR::Attribute::TessellationEvaluationPointU, | ||
| 856 | header.vtg.tessellation_eval_point_u != 0); | ||
| 857 | info.loads.Set(IR::Attribute::TessellationEvaluationPointV, | ||
| 858 | header.vtg.tessellation_eval_point_v != 0); | ||
| 859 | info.loads.Set(IR::Attribute::InstanceId, header.vtg.instance_id != 0); | ||
| 860 | info.loads.Set(IR::Attribute::VertexId, header.vtg.vertex_id != 0); | ||
| 861 | // TODO: Legacy varyings | ||
| 862 | } | ||
| 863 | if (info.stores_indexed_attributes) { | ||
| 864 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 865 | const IR::Attribute attribute{IR::Attribute::Generic0X + index * 4}; | ||
| 866 | const auto mask{header.vtg.OutputGeneric(index)}; | ||
| 867 | for (size_t i = 0; i < 4; ++i) { | ||
| 868 | info.stores.Set(attribute + i, mask[i]); | ||
| 869 | } | ||
| 870 | } | ||
| 871 | for (size_t index = 0; index < 8; ++index) { | ||
| 872 | const u16 mask{header.vtg.omap_systemc.clip_distances}; | ||
| 873 | info.stores.Set(IR::Attribute::ClipDistance0 + index, ((mask >> index) & 1) != 0); | ||
| 874 | } | ||
| 875 | info.stores.Set(IR::Attribute::PrimitiveId, | ||
| 876 | header.vtg.omap_systemb.primitive_array_id != 0); | ||
| 877 | info.stores.Set(IR::Attribute::Layer, header.vtg.omap_systemb.rt_array_index != 0); | ||
| 878 | info.stores.Set(IR::Attribute::ViewportIndex, header.vtg.omap_systemb.viewport_index != 0); | ||
| 879 | info.stores.Set(IR::Attribute::PointSize, header.vtg.omap_systemb.point_size != 0); | ||
| 880 | info.stores.Set(IR::Attribute::PositionX, header.vtg.omap_systemb.position_x != 0); | ||
| 881 | info.stores.Set(IR::Attribute::PositionY, header.vtg.omap_systemb.position_y != 0); | ||
| 882 | info.stores.Set(IR::Attribute::PositionZ, header.vtg.omap_systemb.position_z != 0); | ||
| 883 | info.stores.Set(IR::Attribute::PositionW, header.vtg.omap_systemb.position_w != 0); | ||
| 884 | info.stores.Set(IR::Attribute::PointSpriteS, header.vtg.omap_systemc.point_sprite_s != 0); | ||
| 885 | info.stores.Set(IR::Attribute::PointSpriteT, header.vtg.omap_systemc.point_sprite_t != 0); | ||
| 886 | info.stores.Set(IR::Attribute::FogCoordinate, header.vtg.omap_systemc.fog_coordinate != 0); | ||
| 887 | info.stores.Set(IR::Attribute::TessellationEvaluationPointU, | ||
| 888 | header.vtg.omap_systemc.tessellation_eval_point_u != 0); | ||
| 889 | info.stores.Set(IR::Attribute::TessellationEvaluationPointV, | ||
| 890 | header.vtg.omap_systemc.tessellation_eval_point_v != 0); | ||
| 891 | info.stores.Set(IR::Attribute::InstanceId, header.vtg.omap_systemc.instance_id != 0); | ||
| 892 | info.stores.Set(IR::Attribute::VertexId, header.vtg.omap_systemc.vertex_id != 0); | ||
| 893 | // TODO: Legacy varyings | ||
| 894 | } | ||
| 895 | } | ||
| 896 | } // Anonymous namespace | ||
| 897 | |||
| 898 | void CollectShaderInfoPass(Environment& env, IR::Program& program) { | ||
| 899 | Info& info{program.info}; | ||
| 900 | const u32 base{[&] { | ||
| 901 | switch (program.stage) { | ||
| 902 | case Stage::VertexA: | ||
| 903 | case Stage::VertexB: | ||
| 904 | return 0x110u; | ||
| 905 | case Stage::TessellationControl: | ||
| 906 | return 0x210u; | ||
| 907 | case Stage::TessellationEval: | ||
| 908 | return 0x310u; | ||
| 909 | case Stage::Geometry: | ||
| 910 | return 0x410u; | ||
| 911 | case Stage::Fragment: | ||
| 912 | return 0x510u; | ||
| 913 | case Stage::Compute: | ||
| 914 | return 0x310u; | ||
| 915 | } | ||
| 916 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 917 | }()}; | ||
| 918 | info.nvn_buffer_base = base; | ||
| 919 | |||
| 920 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 921 | for (IR::Inst& inst : block->Instructions()) { | ||
| 922 | Visit(info, inst); | ||
| 923 | } | ||
| 924 | } | ||
| 925 | GatherInfoFromHeader(env, info); | ||
| 926 | } | ||
| 927 | |||
| 928 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp new file mode 100644 index 000000000..8dd6d6c2c --- /dev/null +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -0,0 +1,610 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | #include <type_traits> | ||
| 8 | |||
| 9 | #include "common/bit_cast.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 15 | |||
| 16 | namespace Shader::Optimization { | ||
| 17 | namespace { | ||
| 18 | // Metaprogramming stuff to get arguments information out of a lambda | ||
| 19 | template <typename Func> | ||
| 20 | struct LambdaTraits : LambdaTraits<decltype(&std::remove_reference_t<Func>::operator())> {}; | ||
| 21 | |||
| 22 | template <typename ReturnType, typename LambdaType, typename... Args> | ||
| 23 | struct LambdaTraits<ReturnType (LambdaType::*)(Args...) const> { | ||
| 24 | template <size_t I> | ||
| 25 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 26 | |||
| 27 | static constexpr size_t NUM_ARGS{sizeof...(Args)}; | ||
| 28 | }; | ||
| 29 | |||
| 30 | template <typename T> | ||
| 31 | [[nodiscard]] T Arg(const IR::Value& value) { | ||
| 32 | if constexpr (std::is_same_v<T, bool>) { | ||
| 33 | return value.U1(); | ||
| 34 | } else if constexpr (std::is_same_v<T, u32>) { | ||
| 35 | return value.U32(); | ||
| 36 | } else if constexpr (std::is_same_v<T, s32>) { | ||
| 37 | return static_cast<s32>(value.U32()); | ||
| 38 | } else if constexpr (std::is_same_v<T, f32>) { | ||
| 39 | return value.F32(); | ||
| 40 | } else if constexpr (std::is_same_v<T, u64>) { | ||
| 41 | return value.U64(); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | template <typename T, typename ImmFn> | ||
| 46 | bool FoldCommutative(IR::Inst& inst, ImmFn&& imm_fn) { | ||
| 47 | const IR::Value lhs{inst.Arg(0)}; | ||
| 48 | const IR::Value rhs{inst.Arg(1)}; | ||
| 49 | |||
| 50 | const bool is_lhs_immediate{lhs.IsImmediate()}; | ||
| 51 | const bool is_rhs_immediate{rhs.IsImmediate()}; | ||
| 52 | |||
| 53 | if (is_lhs_immediate && is_rhs_immediate) { | ||
| 54 | const auto result{imm_fn(Arg<T>(lhs), Arg<T>(rhs))}; | ||
| 55 | inst.ReplaceUsesWith(IR::Value{result}); | ||
| 56 | return false; | ||
| 57 | } | ||
| 58 | if (is_lhs_immediate && !is_rhs_immediate) { | ||
| 59 | IR::Inst* const rhs_inst{rhs.InstRecursive()}; | ||
| 60 | if (rhs_inst->GetOpcode() == inst.GetOpcode() && rhs_inst->Arg(1).IsImmediate()) { | ||
| 61 | const auto combined{imm_fn(Arg<T>(lhs), Arg<T>(rhs_inst->Arg(1)))}; | ||
| 62 | inst.SetArg(0, rhs_inst->Arg(0)); | ||
| 63 | inst.SetArg(1, IR::Value{combined}); | ||
| 64 | } else { | ||
| 65 | // Normalize | ||
| 66 | inst.SetArg(0, rhs); | ||
| 67 | inst.SetArg(1, lhs); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | if (!is_lhs_immediate && is_rhs_immediate) { | ||
| 71 | const IR::Inst* const lhs_inst{lhs.InstRecursive()}; | ||
| 72 | if (lhs_inst->GetOpcode() == inst.GetOpcode() && lhs_inst->Arg(1).IsImmediate()) { | ||
| 73 | const auto combined{imm_fn(Arg<T>(rhs), Arg<T>(lhs_inst->Arg(1)))}; | ||
| 74 | inst.SetArg(0, lhs_inst->Arg(0)); | ||
| 75 | inst.SetArg(1, IR::Value{combined}); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | return true; | ||
| 79 | } | ||
| 80 | |||
| 81 | template <typename Func> | ||
| 82 | bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { | ||
| 83 | if (!inst.AreAllArgsImmediates() || inst.HasAssociatedPseudoOperation()) { | ||
| 84 | return false; | ||
| 85 | } | ||
| 86 | using Indices = std::make_index_sequence<LambdaTraits<decltype(func)>::NUM_ARGS>; | ||
| 87 | inst.ReplaceUsesWith(EvalImmediates(inst, func, Indices{})); | ||
| 88 | return true; | ||
| 89 | } | ||
| 90 | |||
| 91 | void FoldGetRegister(IR::Inst& inst) { | ||
| 92 | if (inst.Arg(0).Reg() == IR::Reg::RZ) { | ||
| 93 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | void FoldGetPred(IR::Inst& inst) { | ||
| 98 | if (inst.Arg(0).Pred() == IR::Pred::PT) { | ||
| 99 | inst.ReplaceUsesWith(IR::Value{true}); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | /// Replaces the pattern generated by two XMAD multiplications | ||
| 104 | bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { | ||
| 105 | /* | ||
| 106 | * We are looking for this pattern: | ||
| 107 | * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 | ||
| 108 | * %rhs_mul = IMul32 %rhs_bfe, %factor_b | ||
| 109 | * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 | ||
| 110 | * %rhs_mul = IMul32 %lhs_bfe, %factor_b | ||
| 111 | * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 | ||
| 112 | * %result = IAdd32 %lhs_shl, %rhs_mul | ||
| 113 | * | ||
| 114 | * And replacing it with | ||
| 115 | * %result = IMul32 %factor_a, %factor_b | ||
| 116 | * | ||
| 117 | * This optimization has been proven safe by LLVM and MSVC. | ||
| 118 | */ | ||
| 119 | const IR::Value lhs_arg{inst.Arg(0)}; | ||
| 120 | const IR::Value rhs_arg{inst.Arg(1)}; | ||
| 121 | if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { | ||
| 122 | return false; | ||
| 123 | } | ||
| 124 | IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; | ||
| 125 | if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || | ||
| 126 | lhs_shl->Arg(1) != IR::Value{16U}) { | ||
| 127 | return false; | ||
| 128 | } | ||
| 129 | if (lhs_shl->Arg(0).IsImmediate()) { | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; | ||
| 133 | IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; | ||
| 134 | if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { | ||
| 135 | return false; | ||
| 136 | } | ||
| 137 | if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { | ||
| 138 | return false; | ||
| 139 | } | ||
| 140 | const IR::U32 factor_b{lhs_mul->Arg(1)}; | ||
| 141 | if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { | ||
| 142 | return false; | ||
| 143 | } | ||
| 144 | IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; | ||
| 145 | IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; | ||
| 146 | if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 150 | return false; | ||
| 151 | } | ||
| 152 | if (lhs_bfe->Arg(1) != IR::Value{16U} || lhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 153 | return false; | ||
| 154 | } | ||
| 155 | if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 156 | return false; | ||
| 157 | } | ||
| 158 | if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { | ||
| 159 | return false; | ||
| 160 | } | ||
| 161 | const IR::U32 factor_a{lhs_bfe->Arg(0)}; | ||
| 162 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 163 | inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); | ||
| 164 | return true; | ||
| 165 | } | ||
| 166 | |||
| 167 | template <typename T> | ||
| 168 | void FoldAdd(IR::Block& block, IR::Inst& inst) { | ||
| 169 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 170 | return; | ||
| 171 | } | ||
| 172 | if (!FoldCommutative<T>(inst, [](T a, T b) { return a + b; })) { | ||
| 173 | return; | ||
| 174 | } | ||
| 175 | const IR::Value rhs{inst.Arg(1)}; | ||
| 176 | if (rhs.IsImmediate() && Arg<T>(rhs) == 0) { | ||
| 177 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | if constexpr (std::is_same_v<T, u32>) { | ||
| 181 | if (FoldXmadMultiply(block, inst)) { | ||
| 182 | return; | ||
| 183 | } | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | void FoldISub32(IR::Inst& inst) { | ||
| 188 | if (FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a - b; })) { | ||
| 189 | return; | ||
| 190 | } | ||
| 191 | if (inst.Arg(0).IsImmediate() || inst.Arg(1).IsImmediate()) { | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | // ISub32 is generally used to subtract two constant buffers, compare and replace this with | ||
| 195 | // zero if they equal. | ||
| 196 | const auto equal_cbuf{[](IR::Inst* a, IR::Inst* b) { | ||
| 197 | return a->GetOpcode() == IR::Opcode::GetCbufU32 && | ||
| 198 | b->GetOpcode() == IR::Opcode::GetCbufU32 && a->Arg(0) == b->Arg(0) && | ||
| 199 | a->Arg(1) == b->Arg(1); | ||
| 200 | }}; | ||
| 201 | IR::Inst* op_a{inst.Arg(0).InstRecursive()}; | ||
| 202 | IR::Inst* op_b{inst.Arg(1).InstRecursive()}; | ||
| 203 | if (equal_cbuf(op_a, op_b)) { | ||
| 204 | inst.ReplaceUsesWith(IR::Value{u32{0}}); | ||
| 205 | return; | ||
| 206 | } | ||
| 207 | // It's also possible a value is being added to a cbuf and then subtracted | ||
| 208 | if (op_b->GetOpcode() == IR::Opcode::IAdd32) { | ||
| 209 | // Canonicalize local variables to simplify the following logic | ||
| 210 | std::swap(op_a, op_b); | ||
| 211 | } | ||
| 212 | if (op_b->GetOpcode() != IR::Opcode::GetCbufU32) { | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | IR::Inst* const inst_cbuf{op_b}; | ||
| 216 | if (op_a->GetOpcode() != IR::Opcode::IAdd32) { | ||
| 217 | return; | ||
| 218 | } | ||
| 219 | IR::Value add_op_a{op_a->Arg(0)}; | ||
| 220 | IR::Value add_op_b{op_a->Arg(1)}; | ||
| 221 | if (add_op_b.IsImmediate()) { | ||
| 222 | // Canonicalize | ||
| 223 | std::swap(add_op_a, add_op_b); | ||
| 224 | } | ||
| 225 | if (add_op_b.IsImmediate()) { | ||
| 226 | return; | ||
| 227 | } | ||
| 228 | IR::Inst* const add_cbuf{add_op_b.InstRecursive()}; | ||
| 229 | if (equal_cbuf(add_cbuf, inst_cbuf)) { | ||
| 230 | inst.ReplaceUsesWith(add_op_a); | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | void FoldSelect(IR::Inst& inst) { | ||
| 235 | const IR::Value cond{inst.Arg(0)}; | ||
| 236 | if (cond.IsImmediate()) { | ||
| 237 | inst.ReplaceUsesWith(cond.U1() ? inst.Arg(1) : inst.Arg(2)); | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | void FoldFPMul32(IR::Inst& inst) { | ||
| 242 | const auto control{inst.Flags<IR::FpControl>()}; | ||
| 243 | if (control.no_contraction) { | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | // Fold interpolation operations | ||
| 247 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 248 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 249 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||
| 253 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||
| 254 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || | ||
| 255 | rhs_op->GetOpcode() != IR::Opcode::FPRecip32) { | ||
| 256 | return; | ||
| 257 | } | ||
| 258 | const IR::Value recip_source{rhs_op->Arg(0)}; | ||
| 259 | const IR::Value lhs_mul_source{lhs_op->Arg(1).Resolve()}; | ||
| 260 | if (recip_source.IsImmediate() || lhs_mul_source.IsImmediate()) { | ||
| 261 | return; | ||
| 262 | } | ||
| 263 | IR::Inst* const attr_a{recip_source.InstRecursive()}; | ||
| 264 | IR::Inst* const attr_b{lhs_mul_source.InstRecursive()}; | ||
| 265 | if (attr_a->GetOpcode() != IR::Opcode::GetAttribute || | ||
| 266 | attr_b->GetOpcode() != IR::Opcode::GetAttribute) { | ||
| 267 | return; | ||
| 268 | } | ||
| 269 | if (attr_a->Arg(0).Attribute() == attr_b->Arg(0).Attribute()) { | ||
| 270 | inst.ReplaceUsesWith(lhs_op->Arg(0)); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void FoldLogicalAnd(IR::Inst& inst) { | ||
| 275 | if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a && b; })) { | ||
| 276 | return; | ||
| 277 | } | ||
| 278 | const IR::Value rhs{inst.Arg(1)}; | ||
| 279 | if (rhs.IsImmediate()) { | ||
| 280 | if (rhs.U1()) { | ||
| 281 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 282 | } else { | ||
| 283 | inst.ReplaceUsesWith(IR::Value{false}); | ||
| 284 | } | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | void FoldLogicalOr(IR::Inst& inst) { | ||
| 289 | if (!FoldCommutative<bool>(inst, [](bool a, bool b) { return a || b; })) { | ||
| 290 | return; | ||
| 291 | } | ||
| 292 | const IR::Value rhs{inst.Arg(1)}; | ||
| 293 | if (rhs.IsImmediate()) { | ||
| 294 | if (rhs.U1()) { | ||
| 295 | inst.ReplaceUsesWith(IR::Value{true}); | ||
| 296 | } else { | ||
| 297 | inst.ReplaceUsesWith(inst.Arg(0)); | ||
| 298 | } | ||
| 299 | } | ||
| 300 | } | ||
| 301 | |||
| 302 | void FoldLogicalNot(IR::Inst& inst) { | ||
| 303 | const IR::U1 value{inst.Arg(0)}; | ||
| 304 | if (value.IsImmediate()) { | ||
| 305 | inst.ReplaceUsesWith(IR::Value{!value.U1()}); | ||
| 306 | return; | ||
| 307 | } | ||
| 308 | IR::Inst* const arg{value.InstRecursive()}; | ||
| 309 | if (arg->GetOpcode() == IR::Opcode::LogicalNot) { | ||
| 310 | inst.ReplaceUsesWith(arg->Arg(0)); | ||
| 311 | } | ||
| 312 | } | ||
| 313 | |||
| 314 | template <IR::Opcode op, typename Dest, typename Source> | ||
| 315 | void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | ||
| 316 | const IR::Value value{inst.Arg(0)}; | ||
| 317 | if (value.IsImmediate()) { | ||
| 318 | inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))}); | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | IR::Inst* const arg_inst{value.InstRecursive()}; | ||
| 322 | if (arg_inst->GetOpcode() == reverse) { | ||
| 323 | inst.ReplaceUsesWith(arg_inst->Arg(0)); | ||
| 324 | return; | ||
| 325 | } | ||
| 326 | if constexpr (op == IR::Opcode::BitCastF32U32) { | ||
| 327 | if (arg_inst->GetOpcode() == IR::Opcode::GetCbufU32) { | ||
| 328 | // Replace the bitcast with a typed constant buffer read | ||
| 329 | inst.ReplaceOpcode(IR::Opcode::GetCbufF32); | ||
| 330 | inst.SetArg(0, arg_inst->Arg(0)); | ||
| 331 | inst.SetArg(1, arg_inst->Arg(1)); | ||
| 332 | return; | ||
| 333 | } | ||
| 334 | } | ||
| 335 | } | ||
| 336 | |||
| 337 | void FoldInverseFunc(IR::Inst& inst, IR::Opcode reverse) { | ||
| 338 | const IR::Value value{inst.Arg(0)}; | ||
| 339 | if (value.IsImmediate()) { | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | IR::Inst* const arg_inst{value.InstRecursive()}; | ||
| 343 | if (arg_inst->GetOpcode() == reverse) { | ||
| 344 | inst.ReplaceUsesWith(arg_inst->Arg(0)); | ||
| 345 | return; | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | template <typename Func, size_t... I> | ||
| 350 | IR::Value EvalImmediates(const IR::Inst& inst, Func&& func, std::index_sequence<I...>) { | ||
| 351 | using Traits = LambdaTraits<decltype(func)>; | ||
| 352 | return IR::Value{func(Arg<typename Traits::template ArgType<I>>(inst.Arg(I))...)}; | ||
| 353 | } | ||
| 354 | |||
| 355 | std::optional<IR::Value> FoldCompositeExtractImpl(IR::Value inst_value, IR::Opcode insert, | ||
| 356 | IR::Opcode construct, u32 first_index) { | ||
| 357 | IR::Inst* const inst{inst_value.InstRecursive()}; | ||
| 358 | if (inst->GetOpcode() == construct) { | ||
| 359 | return inst->Arg(first_index); | ||
| 360 | } | ||
| 361 | if (inst->GetOpcode() != insert) { | ||
| 362 | return std::nullopt; | ||
| 363 | } | ||
| 364 | IR::Value value_index{inst->Arg(2)}; | ||
| 365 | if (!value_index.IsImmediate()) { | ||
| 366 | return std::nullopt; | ||
| 367 | } | ||
| 368 | const u32 second_index{value_index.U32()}; | ||
| 369 | if (first_index != second_index) { | ||
| 370 | IR::Value value_composite{inst->Arg(0)}; | ||
| 371 | if (value_composite.IsImmediate()) { | ||
| 372 | return std::nullopt; | ||
| 373 | } | ||
| 374 | return FoldCompositeExtractImpl(value_composite, insert, construct, first_index); | ||
| 375 | } | ||
| 376 | return inst->Arg(1); | ||
| 377 | } | ||
| 378 | |||
| 379 | void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode insert) { | ||
| 380 | const IR::Value value_1{inst.Arg(0)}; | ||
| 381 | const IR::Value value_2{inst.Arg(1)}; | ||
| 382 | if (value_1.IsImmediate()) { | ||
| 383 | return; | ||
| 384 | } | ||
| 385 | if (!value_2.IsImmediate()) { | ||
| 386 | return; | ||
| 387 | } | ||
| 388 | const u32 first_index{value_2.U32()}; | ||
| 389 | const std::optional result{FoldCompositeExtractImpl(value_1, insert, construct, first_index)}; | ||
| 390 | if (!result) { | ||
| 391 | return; | ||
| 392 | } | ||
| 393 | inst.ReplaceUsesWith(*result); | ||
| 394 | } | ||
| 395 | |||
| 396 | IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { | ||
| 397 | if (value.IsImmediate()) { | ||
| 398 | return value; | ||
| 399 | } | ||
| 400 | IR::Inst* const inst{value.InstRecursive()}; | ||
| 401 | if (inst->GetOpcode() == expected_cast) { | ||
| 402 | return inst->Arg(0).Resolve(); | ||
| 403 | } | ||
| 404 | return value; | ||
| 405 | } | ||
| 406 | |||
| 407 | void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||
| 408 | const IR::Value swizzle{inst.Arg(2)}; | ||
| 409 | if (!swizzle.IsImmediate()) { | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 413 | const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 414 | if (value_1.IsImmediate()) { | ||
| 415 | return; | ||
| 416 | } | ||
| 417 | const u32 swizzle_value{swizzle.U32()}; | ||
| 418 | if (swizzle_value != 0x99 && swizzle_value != 0xA5) { | ||
| 419 | return; | ||
| 420 | } | ||
| 421 | IR::Inst* const inst2{value_1.InstRecursive()}; | ||
| 422 | if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | ||
| 426 | if (value_2 != value_3) { | ||
| 427 | return; | ||
| 428 | } | ||
| 429 | const IR::Value index{inst2->Arg(1)}; | ||
| 430 | const IR::Value clamp{inst2->Arg(2)}; | ||
| 431 | const IR::Value segmentation_mask{inst2->Arg(3)}; | ||
| 432 | if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||
| 433 | return; | ||
| 434 | } | ||
| 435 | if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { | ||
| 436 | return; | ||
| 437 | } | ||
| 438 | if (swizzle_value == 0x99) { | ||
| 439 | // DPdxFine | ||
| 440 | if (index.U32() == 1) { | ||
| 441 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 442 | inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{inst.Arg(1)})); | ||
| 443 | } | ||
| 444 | } else if (swizzle_value == 0xA5) { | ||
| 445 | // DPdyFine | ||
| 446 | if (index.U32() == 2) { | ||
| 447 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 448 | inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{inst.Arg(1)})); | ||
| 449 | } | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||
| 454 | switch (inst.GetOpcode()) { | ||
| 455 | case IR::Opcode::GetRegister: | ||
| 456 | return FoldGetRegister(inst); | ||
| 457 | case IR::Opcode::GetPred: | ||
| 458 | return FoldGetPred(inst); | ||
| 459 | case IR::Opcode::IAdd32: | ||
| 460 | return FoldAdd<u32>(block, inst); | ||
| 461 | case IR::Opcode::ISub32: | ||
| 462 | return FoldISub32(inst); | ||
| 463 | case IR::Opcode::IMul32: | ||
| 464 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; }); | ||
| 465 | return; | ||
| 466 | case IR::Opcode::ShiftRightArithmetic32: | ||
| 467 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); }); | ||
| 468 | return; | ||
| 469 | case IR::Opcode::BitCastF32U32: | ||
| 470 | return FoldBitCast<IR::Opcode::BitCastF32U32, f32, u32>(inst, IR::Opcode::BitCastU32F32); | ||
| 471 | case IR::Opcode::BitCastU32F32: | ||
| 472 | return FoldBitCast<IR::Opcode::BitCastU32F32, u32, f32>(inst, IR::Opcode::BitCastF32U32); | ||
| 473 | case IR::Opcode::IAdd64: | ||
| 474 | return FoldAdd<u64>(block, inst); | ||
| 475 | case IR::Opcode::PackHalf2x16: | ||
| 476 | return FoldInverseFunc(inst, IR::Opcode::UnpackHalf2x16); | ||
| 477 | case IR::Opcode::UnpackHalf2x16: | ||
| 478 | return FoldInverseFunc(inst, IR::Opcode::PackHalf2x16); | ||
| 479 | case IR::Opcode::SelectU1: | ||
| 480 | case IR::Opcode::SelectU8: | ||
| 481 | case IR::Opcode::SelectU16: | ||
| 482 | case IR::Opcode::SelectU32: | ||
| 483 | case IR::Opcode::SelectU64: | ||
| 484 | case IR::Opcode::SelectF16: | ||
| 485 | case IR::Opcode::SelectF32: | ||
| 486 | case IR::Opcode::SelectF64: | ||
| 487 | return FoldSelect(inst); | ||
| 488 | case IR::Opcode::FPMul32: | ||
| 489 | return FoldFPMul32(inst); | ||
| 490 | case IR::Opcode::LogicalAnd: | ||
| 491 | return FoldLogicalAnd(inst); | ||
| 492 | case IR::Opcode::LogicalOr: | ||
| 493 | return FoldLogicalOr(inst); | ||
| 494 | case IR::Opcode::LogicalNot: | ||
| 495 | return FoldLogicalNot(inst); | ||
| 496 | case IR::Opcode::SLessThan: | ||
| 497 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); | ||
| 498 | return; | ||
| 499 | case IR::Opcode::ULessThan: | ||
| 500 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); | ||
| 501 | return; | ||
| 502 | case IR::Opcode::SLessThanEqual: | ||
| 503 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); | ||
| 504 | return; | ||
| 505 | case IR::Opcode::ULessThanEqual: | ||
| 506 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a <= b; }); | ||
| 507 | return; | ||
| 508 | case IR::Opcode::SGreaterThan: | ||
| 509 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a > b; }); | ||
| 510 | return; | ||
| 511 | case IR::Opcode::UGreaterThan: | ||
| 512 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a > b; }); | ||
| 513 | return; | ||
| 514 | case IR::Opcode::SGreaterThanEqual: | ||
| 515 | FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a >= b; }); | ||
| 516 | return; | ||
| 517 | case IR::Opcode::UGreaterThanEqual: | ||
| 518 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a >= b; }); | ||
| 519 | return; | ||
| 520 | case IR::Opcode::IEqual: | ||
| 521 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a == b; }); | ||
| 522 | return; | ||
| 523 | case IR::Opcode::INotEqual: | ||
| 524 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a != b; }); | ||
| 525 | return; | ||
| 526 | case IR::Opcode::BitwiseAnd32: | ||
| 527 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a & b; }); | ||
| 528 | return; | ||
| 529 | case IR::Opcode::BitwiseOr32: | ||
| 530 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a | b; }); | ||
| 531 | return; | ||
| 532 | case IR::Opcode::BitwiseXor32: | ||
| 533 | FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a ^ b; }); | ||
| 534 | return; | ||
| 535 | case IR::Opcode::BitFieldUExtract: | ||
| 536 | FoldWhenAllImmediates(inst, [](u32 base, u32 shift, u32 count) { | ||
| 537 | if (static_cast<size_t>(shift) + static_cast<size_t>(count) > 32) { | ||
| 538 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldUExtract, | ||
| 539 | base, shift, count); | ||
| 540 | } | ||
| 541 | return (base >> shift) & ((1U << count) - 1); | ||
| 542 | }); | ||
| 543 | return; | ||
| 544 | case IR::Opcode::BitFieldSExtract: | ||
| 545 | FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) { | ||
| 546 | const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)}; | ||
| 547 | const size_t left_shift{32 - back_shift}; | ||
| 548 | const size_t right_shift{static_cast<size_t>(32 - count)}; | ||
| 549 | if (back_shift > 32 || left_shift >= 32 || right_shift >= 32) { | ||
| 550 | throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract, | ||
| 551 | base, shift, count); | ||
| 552 | } | ||
| 553 | return static_cast<u32>((base << left_shift) >> right_shift); | ||
| 554 | }); | ||
| 555 | return; | ||
| 556 | case IR::Opcode::BitFieldInsert: | ||
| 557 | FoldWhenAllImmediates(inst, [](u32 base, u32 insert, u32 offset, u32 bits) { | ||
| 558 | if (bits >= 32 || offset >= 32) { | ||
| 559 | throw LogicError("Undefined result in {}({}, {}, {}, {})", | ||
| 560 | IR::Opcode::BitFieldInsert, base, insert, offset, bits); | ||
| 561 | } | ||
| 562 | return (base & ~(~(~0u << bits) << offset)) | (insert << offset); | ||
| 563 | }); | ||
| 564 | return; | ||
| 565 | case IR::Opcode::CompositeExtractU32x2: | ||
| 566 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x2, | ||
| 567 | IR::Opcode::CompositeInsertU32x2); | ||
| 568 | case IR::Opcode::CompositeExtractU32x3: | ||
| 569 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x3, | ||
| 570 | IR::Opcode::CompositeInsertU32x3); | ||
| 571 | case IR::Opcode::CompositeExtractU32x4: | ||
| 572 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructU32x4, | ||
| 573 | IR::Opcode::CompositeInsertU32x4); | ||
| 574 | case IR::Opcode::CompositeExtractF32x2: | ||
| 575 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x2, | ||
| 576 | IR::Opcode::CompositeInsertF32x2); | ||
| 577 | case IR::Opcode::CompositeExtractF32x3: | ||
| 578 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x3, | ||
| 579 | IR::Opcode::CompositeInsertF32x3); | ||
| 580 | case IR::Opcode::CompositeExtractF32x4: | ||
| 581 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF32x4, | ||
| 582 | IR::Opcode::CompositeInsertF32x4); | ||
| 583 | case IR::Opcode::CompositeExtractF16x2: | ||
| 584 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x2, | ||
| 585 | IR::Opcode::CompositeInsertF16x2); | ||
| 586 | case IR::Opcode::CompositeExtractF16x3: | ||
| 587 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x3, | ||
| 588 | IR::Opcode::CompositeInsertF16x3); | ||
| 589 | case IR::Opcode::CompositeExtractF16x4: | ||
| 590 | return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, | ||
| 591 | IR::Opcode::CompositeInsertF16x4); | ||
| 592 | case IR::Opcode::FSwizzleAdd: | ||
| 593 | return FoldFSwizzleAdd(block, inst); | ||
| 594 | default: | ||
| 595 | break; | ||
| 596 | } | ||
| 597 | } | ||
| 598 | } // Anonymous namespace | ||
| 599 | |||
| 600 | void ConstantPropagationPass(IR::Program& program) { | ||
| 601 | const auto end{program.post_order_blocks.rend()}; | ||
| 602 | for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | ||
| 603 | IR::Block* const block{*it}; | ||
| 604 | for (IR::Inst& inst : block->Instructions()) { | ||
| 605 | ConstantPropagation(*block, inst); | ||
| 606 | } | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp new file mode 100644 index 000000000..400836301 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dead_code_elimination_pass.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 8 | |||
| 9 | namespace Shader::Optimization { | ||
| 10 | |||
| 11 | void DeadCodeEliminationPass(IR::Program& program) { | ||
| 12 | // We iterate over the instructions in reverse order. | ||
| 13 | // This is because removing an instruction reduces the number of uses for earlier instructions. | ||
| 14 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 15 | auto it{block->end()}; | ||
| 16 | while (it != block->begin()) { | ||
| 17 | --it; | ||
| 18 | if (!it->HasUses() && !it->MayHaveSideEffects()) { | ||
| 19 | it->Invalidate(); | ||
| 20 | it = block->Instructions().erase(it); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp new file mode 100644 index 000000000..055ba9c54 --- /dev/null +++ b/src/shader_recompiler/ir_opt/dual_vertex_pass.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 6 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 7 | |||
| 8 | namespace Shader::Optimization { | ||
| 9 | |||
| 10 | void VertexATransformPass(IR::Program& program) { | ||
| 11 | for (IR::Block* const block : program.blocks) { | ||
| 12 | for (IR::Inst& inst : block->Instructions()) { | ||
| 13 | if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||
| 14 | return inst.Invalidate(); | ||
| 15 | } | ||
| 16 | } | ||
| 17 | } | ||
| 18 | } | ||
| 19 | |||
| 20 | void VertexBTransformPass(IR::Program& program) { | ||
| 21 | for (IR::Block* const block : program.blocks) { | ||
| 22 | for (IR::Inst& inst : block->Instructions()) { | ||
| 23 | if (inst.GetOpcode() == IR::Opcode::Prologue) { | ||
| 24 | return inst.Invalidate(); | ||
| 25 | } | ||
| 26 | } | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp new file mode 100644 index 000000000..4197b0095 --- /dev/null +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -0,0 +1,526 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <compare> | ||
| 7 | #include <optional> | ||
| 8 | #include <queue> | ||
| 9 | |||
| 10 | #include <boost/container/flat_set.hpp> | ||
| 11 | #include <boost/container/small_vector.hpp> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 18 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 19 | |||
| 20 | namespace Shader::Optimization { | ||
| 21 | namespace { | ||
| 22 | /// Address in constant buffers to the storage buffer descriptor | ||
| 23 | struct StorageBufferAddr { | ||
| 24 | auto operator<=>(const StorageBufferAddr&) const noexcept = default; | ||
| 25 | |||
| 26 | u32 index; | ||
| 27 | u32 offset; | ||
| 28 | }; | ||
| 29 | |||
| 30 | /// Block iterator to a global memory instruction and the storage buffer it uses | ||
| 31 | struct StorageInst { | ||
| 32 | StorageBufferAddr storage_buffer; | ||
| 33 | IR::Inst* inst; | ||
| 34 | IR::Block* block; | ||
| 35 | }; | ||
| 36 | |||
| 37 | /// Bias towards a certain range of constant buffers when looking for storage buffers | ||
| 38 | struct Bias { | ||
| 39 | u32 index; | ||
| 40 | u32 offset_begin; | ||
| 41 | u32 offset_end; | ||
| 42 | }; | ||
| 43 | |||
| 44 | using boost::container::flat_set; | ||
| 45 | using boost::container::small_vector; | ||
| 46 | using StorageBufferSet = | ||
| 47 | flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; | ||
| 48 | using StorageInstVector = small_vector<StorageInst, 24>; | ||
| 49 | using StorageWritesSet = | ||
| 50 | flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>; | ||
| 51 | |||
| 52 | struct StorageInfo { | ||
| 53 | StorageBufferSet set; | ||
| 54 | StorageInstVector to_replace; | ||
| 55 | StorageWritesSet writes; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /// Returns true when the instruction is a global memory instruction | ||
| 59 | bool IsGlobalMemory(const IR::Inst& inst) { | ||
| 60 | switch (inst.GetOpcode()) { | ||
| 61 | case IR::Opcode::LoadGlobalS8: | ||
| 62 | case IR::Opcode::LoadGlobalU8: | ||
| 63 | case IR::Opcode::LoadGlobalS16: | ||
| 64 | case IR::Opcode::LoadGlobalU16: | ||
| 65 | case IR::Opcode::LoadGlobal32: | ||
| 66 | case IR::Opcode::LoadGlobal64: | ||
| 67 | case IR::Opcode::LoadGlobal128: | ||
| 68 | case IR::Opcode::WriteGlobalS8: | ||
| 69 | case IR::Opcode::WriteGlobalU8: | ||
| 70 | case IR::Opcode::WriteGlobalS16: | ||
| 71 | case IR::Opcode::WriteGlobalU16: | ||
| 72 | case IR::Opcode::WriteGlobal32: | ||
| 73 | case IR::Opcode::WriteGlobal64: | ||
| 74 | case IR::Opcode::WriteGlobal128: | ||
| 75 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 76 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 77 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 78 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 79 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 80 | case IR::Opcode::GlobalAtomicInc32: | ||
| 81 | case IR::Opcode::GlobalAtomicDec32: | ||
| 82 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 83 | case IR::Opcode::GlobalAtomicOr32: | ||
| 84 | case IR::Opcode::GlobalAtomicXor32: | ||
| 85 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 86 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 87 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 88 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 89 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 90 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 91 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 92 | case IR::Opcode::GlobalAtomicOr64: | ||
| 93 | case IR::Opcode::GlobalAtomicXor64: | ||
| 94 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 95 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 96 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 97 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 98 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 99 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 100 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 101 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 102 | return true; | ||
| 103 | default: | ||
| 104 | return false; | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | /// Returns true when the instruction is a global memory instruction | ||
| 109 | bool IsGlobalMemoryWrite(const IR::Inst& inst) { | ||
| 110 | switch (inst.GetOpcode()) { | ||
| 111 | case IR::Opcode::WriteGlobalS8: | ||
| 112 | case IR::Opcode::WriteGlobalU8: | ||
| 113 | case IR::Opcode::WriteGlobalS16: | ||
| 114 | case IR::Opcode::WriteGlobalU16: | ||
| 115 | case IR::Opcode::WriteGlobal32: | ||
| 116 | case IR::Opcode::WriteGlobal64: | ||
| 117 | case IR::Opcode::WriteGlobal128: | ||
| 118 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 119 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 120 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 121 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 122 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 123 | case IR::Opcode::GlobalAtomicInc32: | ||
| 124 | case IR::Opcode::GlobalAtomicDec32: | ||
| 125 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 126 | case IR::Opcode::GlobalAtomicOr32: | ||
| 127 | case IR::Opcode::GlobalAtomicXor32: | ||
| 128 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 129 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 130 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 131 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 132 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 133 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 134 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 135 | case IR::Opcode::GlobalAtomicOr64: | ||
| 136 | case IR::Opcode::GlobalAtomicXor64: | ||
| 137 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 138 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 139 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 140 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 141 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 142 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 143 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 144 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 145 | return true; | ||
| 146 | default: | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | /// Converts a global memory opcode to its storage buffer equivalent | ||
| 152 | IR::Opcode GlobalToStorage(IR::Opcode opcode) { | ||
| 153 | switch (opcode) { | ||
| 154 | case IR::Opcode::LoadGlobalS8: | ||
| 155 | return IR::Opcode::LoadStorageS8; | ||
| 156 | case IR::Opcode::LoadGlobalU8: | ||
| 157 | return IR::Opcode::LoadStorageU8; | ||
| 158 | case IR::Opcode::LoadGlobalS16: | ||
| 159 | return IR::Opcode::LoadStorageS16; | ||
| 160 | case IR::Opcode::LoadGlobalU16: | ||
| 161 | return IR::Opcode::LoadStorageU16; | ||
| 162 | case IR::Opcode::LoadGlobal32: | ||
| 163 | return IR::Opcode::LoadStorage32; | ||
| 164 | case IR::Opcode::LoadGlobal64: | ||
| 165 | return IR::Opcode::LoadStorage64; | ||
| 166 | case IR::Opcode::LoadGlobal128: | ||
| 167 | return IR::Opcode::LoadStorage128; | ||
| 168 | case IR::Opcode::WriteGlobalS8: | ||
| 169 | return IR::Opcode::WriteStorageS8; | ||
| 170 | case IR::Opcode::WriteGlobalU8: | ||
| 171 | return IR::Opcode::WriteStorageU8; | ||
| 172 | case IR::Opcode::WriteGlobalS16: | ||
| 173 | return IR::Opcode::WriteStorageS16; | ||
| 174 | case IR::Opcode::WriteGlobalU16: | ||
| 175 | return IR::Opcode::WriteStorageU16; | ||
| 176 | case IR::Opcode::WriteGlobal32: | ||
| 177 | return IR::Opcode::WriteStorage32; | ||
| 178 | case IR::Opcode::WriteGlobal64: | ||
| 179 | return IR::Opcode::WriteStorage64; | ||
| 180 | case IR::Opcode::WriteGlobal128: | ||
| 181 | return IR::Opcode::WriteStorage128; | ||
| 182 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 183 | return IR::Opcode::StorageAtomicIAdd32; | ||
| 184 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 185 | return IR::Opcode::StorageAtomicSMin32; | ||
| 186 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 187 | return IR::Opcode::StorageAtomicUMin32; | ||
| 188 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 189 | return IR::Opcode::StorageAtomicSMax32; | ||
| 190 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 191 | return IR::Opcode::StorageAtomicUMax32; | ||
| 192 | case IR::Opcode::GlobalAtomicInc32: | ||
| 193 | return IR::Opcode::StorageAtomicInc32; | ||
| 194 | case IR::Opcode::GlobalAtomicDec32: | ||
| 195 | return IR::Opcode::StorageAtomicDec32; | ||
| 196 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 197 | return IR::Opcode::StorageAtomicAnd32; | ||
| 198 | case IR::Opcode::GlobalAtomicOr32: | ||
| 199 | return IR::Opcode::StorageAtomicOr32; | ||
| 200 | case IR::Opcode::GlobalAtomicXor32: | ||
| 201 | return IR::Opcode::StorageAtomicXor32; | ||
| 202 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 203 | return IR::Opcode::StorageAtomicIAdd64; | ||
| 204 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 205 | return IR::Opcode::StorageAtomicSMin64; | ||
| 206 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 207 | return IR::Opcode::StorageAtomicUMin64; | ||
| 208 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 209 | return IR::Opcode::StorageAtomicSMax64; | ||
| 210 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 211 | return IR::Opcode::StorageAtomicUMax64; | ||
| 212 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 213 | return IR::Opcode::StorageAtomicAnd64; | ||
| 214 | case IR::Opcode::GlobalAtomicOr64: | ||
| 215 | return IR::Opcode::StorageAtomicOr64; | ||
| 216 | case IR::Opcode::GlobalAtomicXor64: | ||
| 217 | return IR::Opcode::StorageAtomicXor64; | ||
| 218 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 219 | return IR::Opcode::StorageAtomicExchange32; | ||
| 220 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 221 | return IR::Opcode::StorageAtomicExchange64; | ||
| 222 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 223 | return IR::Opcode::StorageAtomicAddF32; | ||
| 224 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 225 | return IR::Opcode::StorageAtomicAddF16x2; | ||
| 226 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 227 | return IR::Opcode::StorageAtomicMinF16x2; | ||
| 228 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 229 | return IR::Opcode::StorageAtomicMaxF16x2; | ||
| 230 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 231 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 232 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 233 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 234 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 235 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 236 | default: | ||
| 237 | throw InvalidArgument("Invalid global memory opcode {}", opcode); | ||
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 | /// Returns true when a storage buffer address satisfies a bias | ||
| 242 | bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept { | ||
| 243 | return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin && | ||
| 244 | storage_buffer.offset < bias.offset_end; | ||
| 245 | } | ||
| 246 | |||
| 247 | struct LowAddrInfo { | ||
| 248 | IR::U32 value; | ||
| 249 | s32 imm_offset; | ||
| 250 | }; | ||
| 251 | |||
| 252 | /// Tries to track the first 32-bits of a global memory instruction | ||
| 253 | std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { | ||
| 254 | // The first argument is the low level GPU pointer to the global memory instruction | ||
| 255 | const IR::Value addr{inst->Arg(0)}; | ||
| 256 | if (addr.IsImmediate()) { | ||
| 257 | // Not much we can do if it's an immediate | ||
| 258 | return std::nullopt; | ||
| 259 | } | ||
| 260 | // This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2 | ||
| 261 | IR::Inst* addr_inst{addr.InstRecursive()}; | ||
| 262 | s32 imm_offset{0}; | ||
| 263 | if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) { | ||
| 264 | // If it's an IAdd64, get the immediate offset it is applying and grab the address | ||
| 265 | // instruction. This expects for the instruction to be canonicalized having the address on | ||
| 266 | // the first argument and the immediate offset on the second one. | ||
| 267 | const IR::U64 imm_offset_value{addr_inst->Arg(1)}; | ||
| 268 | if (!imm_offset_value.IsImmediate()) { | ||
| 269 | return std::nullopt; | ||
| 270 | } | ||
| 271 | imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64())); | ||
| 272 | const IR::U64 iadd_addr{addr_inst->Arg(0)}; | ||
| 273 | if (iadd_addr.IsImmediate()) { | ||
| 274 | return std::nullopt; | ||
| 275 | } | ||
| 276 | addr_inst = iadd_addr.InstRecursive(); | ||
| 277 | } | ||
| 278 | // With IAdd64 handled, now PackUint2x32 is expected | ||
| 279 | if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) { | ||
| 280 | // PackUint2x32 is expected to be generated from a vector | ||
| 281 | const IR::Value vector{addr_inst->Arg(0)}; | ||
| 282 | if (vector.IsImmediate()) { | ||
| 283 | return std::nullopt; | ||
| 284 | } | ||
| 285 | addr_inst = vector.InstRecursive(); | ||
| 286 | } | ||
| 287 | // The vector is expected to be a CompositeConstructU32x2 | ||
| 288 | if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) { | ||
| 289 | return std::nullopt; | ||
| 290 | } | ||
| 291 | // Grab the first argument from the CompositeConstructU32x2, this is the low address. | ||
| 292 | return LowAddrInfo{ | ||
| 293 | .value{IR::U32{addr_inst->Arg(0)}}, | ||
| 294 | .imm_offset = imm_offset, | ||
| 295 | }; | ||
| 296 | } | ||
| 297 | |||
| 298 | /// Tries to track the storage buffer address used by a global memory instruction | ||
| 299 | std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { | ||
| 300 | const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { | ||
| 301 | if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { | ||
| 302 | return std::nullopt; | ||
| 303 | } | ||
| 304 | const IR::Value index{inst->Arg(0)}; | ||
| 305 | const IR::Value offset{inst->Arg(1)}; | ||
| 306 | if (!index.IsImmediate()) { | ||
| 307 | // Definitely not a storage buffer if it's read from a | ||
| 308 | // non-immediate index | ||
| 309 | return std::nullopt; | ||
| 310 | } | ||
| 311 | if (!offset.IsImmediate()) { | ||
| 312 | // TODO: Support SSBO arrays | ||
| 313 | return std::nullopt; | ||
| 314 | } | ||
| 315 | const StorageBufferAddr storage_buffer{ | ||
| 316 | .index = index.U32(), | ||
| 317 | .offset = offset.U32(), | ||
| 318 | }; | ||
| 319 | if (!Common::IsAligned(storage_buffer.offset, 16)) { | ||
| 320 | // The SSBO pointer has to be aligned | ||
| 321 | return std::nullopt; | ||
| 322 | } | ||
| 323 | if (bias && !MeetsBias(storage_buffer, *bias)) { | ||
| 324 | // We have to blacklist some addresses in case we wrongly | ||
| 325 | // point to them | ||
| 326 | return std::nullopt; | ||
| 327 | } | ||
| 328 | return storage_buffer; | ||
| 329 | }}; | ||
| 330 | return BreadthFirstSearch(value, pred); | ||
| 331 | } | ||
| 332 | |||
| 333 | /// Collects the storage buffer used by a global memory instruction and the instruction itself | ||
| 334 | void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) { | ||
| 335 | // NVN puts storage buffers in a specific range, we have to bias towards these addresses to | ||
| 336 | // avoid getting false positives | ||
| 337 | static constexpr Bias nvn_bias{ | ||
| 338 | .index = 0, | ||
| 339 | .offset_begin = 0x110, | ||
| 340 | .offset_end = 0x610, | ||
| 341 | }; | ||
| 342 | // Track the low address of the instruction | ||
| 343 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; | ||
| 344 | if (!low_addr_info) { | ||
| 345 | // Failed to track the low address, use NVN fallbacks | ||
| 346 | return; | ||
| 347 | } | ||
| 348 | // First try to find storage buffers in the NVN address | ||
| 349 | const IR::U32 low_addr{low_addr_info->value}; | ||
| 350 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; | ||
| 351 | if (!storage_buffer) { | ||
| 352 | // If it fails, track without a bias | ||
| 353 | storage_buffer = Track(low_addr, nullptr); | ||
| 354 | if (!storage_buffer) { | ||
| 355 | // If that also fails, use NVN fallbacks | ||
| 356 | return; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | // Collect storage buffer and the instruction | ||
| 360 | if (IsGlobalMemoryWrite(inst)) { | ||
| 361 | info.writes.insert(*storage_buffer); | ||
| 362 | } | ||
| 363 | info.set.insert(*storage_buffer); | ||
| 364 | info.to_replace.push_back(StorageInst{ | ||
| 365 | .storage_buffer{*storage_buffer}, | ||
| 366 | .inst = &inst, | ||
| 367 | .block = &block, | ||
| 368 | }); | ||
| 369 | } | ||
| 370 | |||
| 371 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | ||
| 372 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | ||
| 373 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 374 | IR::U32 offset; | ||
| 375 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | ||
| 376 | offset = low_addr->value; | ||
| 377 | if (low_addr->imm_offset != 0) { | ||
| 378 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | ||
| 379 | } | ||
| 380 | } else { | ||
| 381 | offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); | ||
| 382 | } | ||
| 383 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | ||
| 384 | // buffer offset in bytes. | ||
| 385 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | ||
| 386 | return ir.ISub(offset, low_cbuf); | ||
| 387 | } | ||
| 388 | |||
| 389 | /// Replace a global memory load instruction with its storage buffer equivalent | ||
| 390 | void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 391 | const IR::U32& offset) { | ||
| 392 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 393 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 394 | const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})}; | ||
| 395 | inst.ReplaceUsesWith(value); | ||
| 396 | } | ||
| 397 | |||
| 398 | /// Replace a global memory write instruction with its storage buffer equivalent | ||
| 399 | void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 400 | const IR::U32& offset) { | ||
| 401 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 402 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 403 | block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)}); | ||
| 404 | inst.Invalidate(); | ||
| 405 | } | ||
| 406 | |||
| 407 | /// Replace an atomic operation on global memory instruction with its storage buffer equivalent | ||
| 408 | void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 409 | const IR::U32& offset) { | ||
| 410 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 411 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 412 | const IR::Value value{ | ||
| 413 | &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; | ||
| 414 | inst.ReplaceUsesWith(value); | ||
| 415 | } | ||
| 416 | |||
| 417 | /// Replace a global memory instruction with its storage buffer equivalent | ||
| 418 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 419 | const IR::U32& offset) { | ||
| 420 | switch (inst.GetOpcode()) { | ||
| 421 | case IR::Opcode::LoadGlobalS8: | ||
| 422 | case IR::Opcode::LoadGlobalU8: | ||
| 423 | case IR::Opcode::LoadGlobalS16: | ||
| 424 | case IR::Opcode::LoadGlobalU16: | ||
| 425 | case IR::Opcode::LoadGlobal32: | ||
| 426 | case IR::Opcode::LoadGlobal64: | ||
| 427 | case IR::Opcode::LoadGlobal128: | ||
| 428 | return ReplaceLoad(block, inst, storage_index, offset); | ||
| 429 | case IR::Opcode::WriteGlobalS8: | ||
| 430 | case IR::Opcode::WriteGlobalU8: | ||
| 431 | case IR::Opcode::WriteGlobalS16: | ||
| 432 | case IR::Opcode::WriteGlobalU16: | ||
| 433 | case IR::Opcode::WriteGlobal32: | ||
| 434 | case IR::Opcode::WriteGlobal64: | ||
| 435 | case IR::Opcode::WriteGlobal128: | ||
| 436 | return ReplaceWrite(block, inst, storage_index, offset); | ||
| 437 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 438 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 439 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 440 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 441 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 442 | case IR::Opcode::GlobalAtomicInc32: | ||
| 443 | case IR::Opcode::GlobalAtomicDec32: | ||
| 444 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 445 | case IR::Opcode::GlobalAtomicOr32: | ||
| 446 | case IR::Opcode::GlobalAtomicXor32: | ||
| 447 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 448 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 449 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 450 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 451 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 452 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 453 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 454 | case IR::Opcode::GlobalAtomicOr64: | ||
| 455 | case IR::Opcode::GlobalAtomicXor64: | ||
| 456 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 457 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 458 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 459 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 460 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 461 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 462 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 463 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 464 | return ReplaceAtomic(block, inst, storage_index, offset); | ||
| 465 | default: | ||
| 466 | throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); | ||
| 467 | } | ||
| 468 | } | ||
| 469 | } // Anonymous namespace | ||
| 470 | |||
| 471 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | ||
| 472 | StorageInfo info; | ||
| 473 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 474 | for (IR::Inst& inst : block->Instructions()) { | ||
| 475 | if (!IsGlobalMemory(inst)) { | ||
| 476 | continue; | ||
| 477 | } | ||
| 478 | CollectStorageBuffers(*block, inst, info); | ||
| 479 | } | ||
| 480 | } | ||
| 481 | for (const StorageBufferAddr& storage_buffer : info.set) { | ||
| 482 | program.info.storage_buffers_descriptors.push_back({ | ||
| 483 | .cbuf_index = storage_buffer.index, | ||
| 484 | .cbuf_offset = storage_buffer.offset, | ||
| 485 | .count = 1, | ||
| 486 | .is_written = info.writes.contains(storage_buffer), | ||
| 487 | }); | ||
| 488 | } | ||
| 489 | for (const StorageInst& storage_inst : info.to_replace) { | ||
| 490 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | ||
| 491 | const auto it{info.set.find(storage_inst.storage_buffer)}; | ||
| 492 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | ||
| 493 | IR::Block* const block{storage_inst.block}; | ||
| 494 | IR::Inst* const inst{storage_inst.inst}; | ||
| 495 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | ||
| 496 | Replace(*block, *inst, index, offset); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | |||
| 500 | template <typename Descriptors, typename Descriptor, typename Func> | ||
| 501 | static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||
| 502 | // TODO: Handle arrays | ||
| 503 | const auto it{std::ranges::find_if(descriptors, pred)}; | ||
| 504 | if (it != descriptors.end()) { | ||
| 505 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 506 | } | ||
| 507 | descriptors.push_back(desc); | ||
| 508 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 509 | } | ||
| 510 | |||
| 511 | void JoinStorageInfo(Info& base, Info& source) { | ||
| 512 | auto& descriptors = base.storage_buffers_descriptors; | ||
| 513 | for (auto& desc : source.storage_buffers_descriptors) { | ||
| 514 | auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { | ||
| 515 | return desc.cbuf_index == existing.cbuf_index && | ||
| 516 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; | ||
| 517 | })}; | ||
| 518 | if (it != descriptors.end()) { | ||
| 519 | it->is_written |= desc.is_written; | ||
| 520 | continue; | ||
| 521 | } | ||
| 522 | descriptors.push_back(desc); | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 526 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/identity_removal_pass.cpp b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp new file mode 100644 index 000000000..e9b55f835 --- /dev/null +++ b/src/shader_recompiler/ir_opt/identity_removal_pass.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 10 | |||
| 11 | namespace Shader::Optimization { | ||
| 12 | |||
| 13 | void IdentityRemovalPass(IR::Program& program) { | ||
| 14 | std::vector<IR::Inst*> to_invalidate; | ||
| 15 | for (IR::Block* const block : program.blocks) { | ||
| 16 | for (auto inst = block->begin(); inst != block->end();) { | ||
| 17 | const size_t num_args{inst->NumArgs()}; | ||
| 18 | for (size_t i = 0; i < num_args; ++i) { | ||
| 19 | IR::Value arg; | ||
| 20 | while ((arg = inst->Arg(i)).IsIdentity()) { | ||
| 21 | inst->SetArg(i, arg.Inst()->Arg(0)); | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (inst->GetOpcode() == IR::Opcode::Identity || | ||
| 25 | inst->GetOpcode() == IR::Opcode::Void) { | ||
| 26 | to_invalidate.push_back(&*inst); | ||
| 27 | inst = block->Instructions().erase(inst); | ||
| 28 | } else { | ||
| 29 | ++inst; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } | ||
| 33 | for (IR::Inst* const inst : to_invalidate) { | ||
| 34 | inst->Invalidate(); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..773e1f961 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 10 | |||
| 11 | namespace Shader::Optimization { | ||
| 12 | namespace { | ||
| 13 | IR::Opcode Replace(IR::Opcode op) { | ||
| 14 | switch (op) { | ||
| 15 | case IR::Opcode::FPAbs16: | ||
| 16 | return IR::Opcode::FPAbs32; | ||
| 17 | case IR::Opcode::FPAdd16: | ||
| 18 | return IR::Opcode::FPAdd32; | ||
| 19 | case IR::Opcode::FPCeil16: | ||
| 20 | return IR::Opcode::FPCeil32; | ||
| 21 | case IR::Opcode::FPFloor16: | ||
| 22 | return IR::Opcode::FPFloor32; | ||
| 23 | case IR::Opcode::FPFma16: | ||
| 24 | return IR::Opcode::FPFma32; | ||
| 25 | case IR::Opcode::FPMul16: | ||
| 26 | return IR::Opcode::FPMul32; | ||
| 27 | case IR::Opcode::FPNeg16: | ||
| 28 | return IR::Opcode::FPNeg32; | ||
| 29 | case IR::Opcode::FPRoundEven16: | ||
| 30 | return IR::Opcode::FPRoundEven32; | ||
| 31 | case IR::Opcode::FPSaturate16: | ||
| 32 | return IR::Opcode::FPSaturate32; | ||
| 33 | case IR::Opcode::FPClamp16: | ||
| 34 | return IR::Opcode::FPClamp32; | ||
| 35 | case IR::Opcode::FPTrunc16: | ||
| 36 | return IR::Opcode::FPTrunc32; | ||
| 37 | case IR::Opcode::CompositeConstructF16x2: | ||
| 38 | return IR::Opcode::CompositeConstructF32x2; | ||
| 39 | case IR::Opcode::CompositeConstructF16x3: | ||
| 40 | return IR::Opcode::CompositeConstructF32x3; | ||
| 41 | case IR::Opcode::CompositeConstructF16x4: | ||
| 42 | return IR::Opcode::CompositeConstructF32x4; | ||
| 43 | case IR::Opcode::CompositeExtractF16x2: | ||
| 44 | return IR::Opcode::CompositeExtractF32x2; | ||
| 45 | case IR::Opcode::CompositeExtractF16x3: | ||
| 46 | return IR::Opcode::CompositeExtractF32x3; | ||
| 47 | case IR::Opcode::CompositeExtractF16x4: | ||
| 48 | return IR::Opcode::CompositeExtractF32x4; | ||
| 49 | case IR::Opcode::CompositeInsertF16x2: | ||
| 50 | return IR::Opcode::CompositeInsertF32x2; | ||
| 51 | case IR::Opcode::CompositeInsertF16x3: | ||
| 52 | return IR::Opcode::CompositeInsertF32x3; | ||
| 53 | case IR::Opcode::CompositeInsertF16x4: | ||
| 54 | return IR::Opcode::CompositeInsertF32x4; | ||
| 55 | case IR::Opcode::FPOrdEqual16: | ||
| 56 | return IR::Opcode::FPOrdEqual32; | ||
| 57 | case IR::Opcode::FPUnordEqual16: | ||
| 58 | return IR::Opcode::FPUnordEqual32; | ||
| 59 | case IR::Opcode::FPOrdNotEqual16: | ||
| 60 | return IR::Opcode::FPOrdNotEqual32; | ||
| 61 | case IR::Opcode::FPUnordNotEqual16: | ||
| 62 | return IR::Opcode::FPUnordNotEqual32; | ||
| 63 | case IR::Opcode::FPOrdLessThan16: | ||
| 64 | return IR::Opcode::FPOrdLessThan32; | ||
| 65 | case IR::Opcode::FPUnordLessThan16: | ||
| 66 | return IR::Opcode::FPUnordLessThan32; | ||
| 67 | case IR::Opcode::FPOrdGreaterThan16: | ||
| 68 | return IR::Opcode::FPOrdGreaterThan32; | ||
| 69 | case IR::Opcode::FPUnordGreaterThan16: | ||
| 70 | return IR::Opcode::FPUnordGreaterThan32; | ||
| 71 | case IR::Opcode::FPOrdLessThanEqual16: | ||
| 72 | return IR::Opcode::FPOrdLessThanEqual32; | ||
| 73 | case IR::Opcode::FPUnordLessThanEqual16: | ||
| 74 | return IR::Opcode::FPUnordLessThanEqual32; | ||
| 75 | case IR::Opcode::FPOrdGreaterThanEqual16: | ||
| 76 | return IR::Opcode::FPOrdGreaterThanEqual32; | ||
| 77 | case IR::Opcode::FPUnordGreaterThanEqual16: | ||
| 78 | return IR::Opcode::FPUnordGreaterThanEqual32; | ||
| 79 | case IR::Opcode::FPIsNan16: | ||
| 80 | return IR::Opcode::FPIsNan32; | ||
| 81 | case IR::Opcode::ConvertS16F16: | ||
| 82 | return IR::Opcode::ConvertS16F32; | ||
| 83 | case IR::Opcode::ConvertS32F16: | ||
| 84 | return IR::Opcode::ConvertS32F32; | ||
| 85 | case IR::Opcode::ConvertS64F16: | ||
| 86 | return IR::Opcode::ConvertS64F32; | ||
| 87 | case IR::Opcode::ConvertU16F16: | ||
| 88 | return IR::Opcode::ConvertU16F32; | ||
| 89 | case IR::Opcode::ConvertU32F16: | ||
| 90 | return IR::Opcode::ConvertU32F32; | ||
| 91 | case IR::Opcode::ConvertU64F16: | ||
| 92 | return IR::Opcode::ConvertU64F32; | ||
| 93 | case IR::Opcode::PackFloat2x16: | ||
| 94 | return IR::Opcode::PackHalf2x16; | ||
| 95 | case IR::Opcode::UnpackFloat2x16: | ||
| 96 | return IR::Opcode::UnpackHalf2x16; | ||
| 97 | case IR::Opcode::ConvertF32F16: | ||
| 98 | return IR::Opcode::Identity; | ||
| 99 | case IR::Opcode::ConvertF16F32: | ||
| 100 | return IR::Opcode::Identity; | ||
| 101 | case IR::Opcode::ConvertF16S8: | ||
| 102 | return IR::Opcode::ConvertF32S8; | ||
| 103 | case IR::Opcode::ConvertF16S16: | ||
| 104 | return IR::Opcode::ConvertF32S16; | ||
| 105 | case IR::Opcode::ConvertF16S32: | ||
| 106 | return IR::Opcode::ConvertF32S32; | ||
| 107 | case IR::Opcode::ConvertF16S64: | ||
| 108 | return IR::Opcode::ConvertF32S64; | ||
| 109 | case IR::Opcode::ConvertF16U8: | ||
| 110 | return IR::Opcode::ConvertF32U8; | ||
| 111 | case IR::Opcode::ConvertF16U16: | ||
| 112 | return IR::Opcode::ConvertF32U16; | ||
| 113 | case IR::Opcode::ConvertF16U32: | ||
| 114 | return IR::Opcode::ConvertF32U32; | ||
| 115 | case IR::Opcode::ConvertF16U64: | ||
| 116 | return IR::Opcode::ConvertF32U64; | ||
| 117 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 118 | return IR::Opcode::GlobalAtomicAddF32x2; | ||
| 119 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 120 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 121 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 122 | return IR::Opcode::GlobalAtomicMinF32x2; | ||
| 123 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 124 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 125 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 126 | return IR::Opcode::GlobalAtomicMaxF32x2; | ||
| 127 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 128 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 129 | default: | ||
| 130 | return op; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | } // Anonymous namespace | ||
| 134 | |||
| 135 | void LowerFp16ToFp32(IR::Program& program) { | ||
| 136 | for (IR::Block* const block : program.blocks) { | ||
| 137 | for (IR::Inst& inst : block->Instructions()) { | ||
| 138 | inst.ReplaceOpcode(Replace(inst.GetOpcode())); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp new file mode 100644 index 000000000..e80d3d1d9 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 12 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 13 | |||
| 14 | namespace Shader::Optimization { | ||
| 15 | namespace { | ||
| 16 | std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) { | ||
| 17 | if (packed.IsImmediate()) { | ||
| 18 | const u64 value{packed.U64()}; | ||
| 19 | return { | ||
| 20 | ir.Imm32(static_cast<u32>(value)), | ||
| 21 | ir.Imm32(static_cast<u32>(value >> 32)), | ||
| 22 | }; | ||
| 23 | } else { | ||
| 24 | return std::pair<IR::U32, IR::U32>{ | ||
| 25 | ir.CompositeExtract(packed, 0u), | ||
| 26 | ir.CompositeExtract(packed, 1u), | ||
| 27 | }; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void IAdd64To32(IR::Block& block, IR::Inst& inst) { | ||
| 32 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 33 | throw NotImplementedException("IAdd64 emulation with pseudo instructions"); | ||
| 34 | } | ||
| 35 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 36 | const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; | ||
| 37 | const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; | ||
| 38 | |||
| 39 | const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)}; | ||
| 40 | const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))}; | ||
| 41 | |||
| 42 | const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)}; | ||
| 43 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void ISub64To32(IR::Block& block, IR::Inst& inst) { | ||
| 47 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 48 | throw NotImplementedException("ISub64 emulation with pseudo instructions"); | ||
| 49 | } | ||
| 50 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 51 | const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))}; | ||
| 52 | const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))}; | ||
| 53 | |||
| 54 | const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)}; | ||
| 55 | const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)}; | ||
| 56 | const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))}; | ||
| 57 | |||
| 58 | const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)}; | ||
| 59 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 60 | } | ||
| 61 | |||
| 62 | void INeg64To32(IR::Block& block, IR::Inst& inst) { | ||
| 63 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 64 | throw NotImplementedException("INeg64 emulation with pseudo instructions"); | ||
| 65 | } | ||
| 66 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 67 | auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 68 | lo = ir.BitwiseNot(lo); | ||
| 69 | hi = ir.BitwiseNot(hi); | ||
| 70 | |||
| 71 | lo = ir.IAdd(lo, ir.Imm32(1)); | ||
| 72 | |||
| 73 | const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))}; | ||
| 74 | hi = ir.IAdd(hi, carry); | ||
| 75 | |||
| 76 | inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi)); | ||
| 77 | } | ||
| 78 | |||
| 79 | void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) { | ||
| 80 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 81 | throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions"); | ||
| 82 | } | ||
| 83 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 84 | const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 85 | const IR::U32 shift{inst.Arg(1)}; | ||
| 86 | |||
| 87 | const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)}; | ||
| 88 | const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)}; | ||
| 89 | |||
| 90 | const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; | ||
| 91 | const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; | ||
| 92 | const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; | ||
| 93 | |||
| 94 | const IR::U32 long_ret_lo{ir.Imm32(0)}; | ||
| 95 | const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)}; | ||
| 96 | |||
| 97 | const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; | ||
| 98 | const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)}; | ||
| 99 | const IR::U32 short_ret_lo{shifted_lo}; | ||
| 100 | const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)}; | ||
| 101 | |||
| 102 | const IR::U32 zero_ret_lo{lo}; | ||
| 103 | const IR::U32 zero_ret_hi{hi}; | ||
| 104 | |||
| 105 | const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; | ||
| 106 | const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; | ||
| 107 | |||
| 108 | const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; | ||
| 109 | const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; | ||
| 110 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 111 | } | ||
| 112 | |||
| 113 | void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) { | ||
| 114 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 115 | throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions"); | ||
| 116 | } | ||
| 117 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 118 | const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 119 | const IR::U32 shift{inst.Arg(1)}; | ||
| 120 | |||
| 121 | const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; | ||
| 122 | const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)}; | ||
| 123 | |||
| 124 | const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; | ||
| 125 | const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; | ||
| 126 | const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; | ||
| 127 | |||
| 128 | const IR::U32 long_ret_hi{ir.Imm32(0)}; | ||
| 129 | const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)}; | ||
| 130 | |||
| 131 | const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; | ||
| 132 | const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)}; | ||
| 133 | const IR::U32 short_ret_hi{shifted_hi}; | ||
| 134 | const IR::U32 short_ret_lo{ | ||
| 135 | ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; | ||
| 136 | |||
| 137 | const IR::U32 zero_ret_lo{lo}; | ||
| 138 | const IR::U32 zero_ret_hi{hi}; | ||
| 139 | |||
| 140 | const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; | ||
| 141 | const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; | ||
| 142 | |||
| 143 | const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; | ||
| 144 | const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; | ||
| 145 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 146 | } | ||
| 147 | |||
| 148 | void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) { | ||
| 149 | if (inst.HasAssociatedPseudoOperation()) { | ||
| 150 | throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions"); | ||
| 151 | } | ||
| 152 | IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst)); | ||
| 153 | const auto [lo, hi]{Unpack(ir, inst.Arg(0))}; | ||
| 154 | const IR::U32 shift{inst.Arg(1)}; | ||
| 155 | |||
| 156 | const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)}; | ||
| 157 | const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)}; | ||
| 158 | |||
| 159 | const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))}; | ||
| 160 | |||
| 161 | const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))}; | ||
| 162 | const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)}; | ||
| 163 | const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))}; | ||
| 164 | |||
| 165 | const IR::U32 long_ret_hi{sign_extension}; | ||
| 166 | const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)}; | ||
| 167 | |||
| 168 | const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)}; | ||
| 169 | const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift)); | ||
| 170 | const IR::U32 short_ret_hi{shifted_hi}; | ||
| 171 | const IR::U32 short_ret_lo{ | ||
| 172 | ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)}; | ||
| 173 | |||
| 174 | const IR::U32 zero_ret_lo{lo}; | ||
| 175 | const IR::U32 zero_ret_hi{hi}; | ||
| 176 | |||
| 177 | const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)}; | ||
| 178 | const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)}; | ||
| 179 | |||
| 180 | const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)}; | ||
| 181 | const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)}; | ||
| 182 | inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi)); | ||
| 183 | } | ||
| 184 | |||
| 185 | void Lower(IR::Block& block, IR::Inst& inst) { | ||
| 186 | switch (inst.GetOpcode()) { | ||
| 187 | case IR::Opcode::PackUint2x32: | ||
| 188 | case IR::Opcode::UnpackUint2x32: | ||
| 189 | return inst.ReplaceOpcode(IR::Opcode::Identity); | ||
| 190 | case IR::Opcode::IAdd64: | ||
| 191 | return IAdd64To32(block, inst); | ||
| 192 | case IR::Opcode::ISub64: | ||
| 193 | return ISub64To32(block, inst); | ||
| 194 | case IR::Opcode::INeg64: | ||
| 195 | return INeg64To32(block, inst); | ||
| 196 | case IR::Opcode::ShiftLeftLogical64: | ||
| 197 | return ShiftLeftLogical64To32(block, inst); | ||
| 198 | case IR::Opcode::ShiftRightLogical64: | ||
| 199 | return ShiftRightLogical64To32(block, inst); | ||
| 200 | case IR::Opcode::ShiftRightArithmetic64: | ||
| 201 | return ShiftRightArithmetic64To32(block, inst); | ||
| 202 | default: | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } // Anonymous namespace | ||
| 207 | |||
| 208 | void LowerInt64ToInt32(IR::Program& program) { | ||
| 209 | const auto end{program.post_order_blocks.rend()}; | ||
| 210 | for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | ||
| 211 | IR::Block* const block{*it}; | ||
| 212 | for (IR::Inst& inst : block->Instructions()) { | ||
| 213 | Lower(*block, inst); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h new file mode 100644 index 000000000..2f89b1ea0 --- /dev/null +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include "shader_recompiler/environment.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | |||
| 15 | void CollectShaderInfoPass(Environment& env, IR::Program& program); | ||
| 16 | void ConstantPropagationPass(IR::Program& program); | ||
| 17 | void DeadCodeEliminationPass(IR::Program& program); | ||
| 18 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | ||
| 19 | void IdentityRemovalPass(IR::Program& program); | ||
| 20 | void LowerFp16ToFp32(IR::Program& program); | ||
| 21 | void LowerInt64ToInt32(IR::Program& program); | ||
| 22 | void SsaRewritePass(IR::Program& program); | ||
| 23 | void TexturePass(Environment& env, IR::Program& program); | ||
| 24 | void VerificationPass(const IR::Program& program); | ||
| 25 | |||
| 26 | // Dual Vertex | ||
| 27 | void VertexATransformPass(IR::Program& program); | ||
| 28 | void VertexBTransformPass(IR::Program& program); | ||
| 29 | void JoinTextureInfo(Info& base, Info& source); | ||
| 30 | void JoinStorageInfo(Info& base, Info& source); | ||
| 31 | |||
| 32 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp new file mode 100644 index 000000000..53145fb5e --- /dev/null +++ b/src/shader_recompiler/ir_opt/ssa_rewrite_pass.cpp | |||
| @@ -0,0 +1,383 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // This file implements the SSA rewriting algorithm proposed in | ||
| 6 | // | ||
| 7 | // Simple and Efficient Construction of Static Single Assignment Form. | ||
| 8 | // Braun M., Buchwald S., Hack S., Leiba R., Mallon C., Zwinkau A. (2013) | ||
| 9 | // In: Jhala R., De Bosschere K. (eds) | ||
| 10 | // Compiler Construction. CC 2013. | ||
| 11 | // Lecture Notes in Computer Science, vol 7791. | ||
| 12 | // Springer, Berlin, Heidelberg | ||
| 13 | // | ||
| 14 | // https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 | ||
| 15 | // | ||
| 16 | |||
| 17 | #include <span> | ||
| 18 | #include <variant> | ||
| 19 | #include <vector> | ||
| 20 | |||
| 21 | #include <boost/container/flat_map.hpp> | ||
| 22 | #include <boost/container/flat_set.hpp> | ||
| 23 | |||
| 24 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 25 | #include "shader_recompiler/frontend/ir/opcodes.h" | ||
| 26 | #include "shader_recompiler/frontend/ir/pred.h" | ||
| 27 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 28 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 29 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 30 | |||
| 31 | namespace Shader::Optimization { | ||
| 32 | namespace { | ||
| 33 | struct FlagTag { | ||
| 34 | auto operator<=>(const FlagTag&) const noexcept = default; | ||
| 35 | }; | ||
| 36 | struct ZeroFlagTag : FlagTag {}; | ||
| 37 | struct SignFlagTag : FlagTag {}; | ||
| 38 | struct CarryFlagTag : FlagTag {}; | ||
| 39 | struct OverflowFlagTag : FlagTag {}; | ||
| 40 | |||
| 41 | struct GotoVariable : FlagTag { | ||
| 42 | GotoVariable() = default; | ||
| 43 | explicit GotoVariable(u32 index_) : index{index_} {} | ||
| 44 | |||
| 45 | auto operator<=>(const GotoVariable&) const noexcept = default; | ||
| 46 | |||
| 47 | u32 index; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct IndirectBranchVariable { | ||
| 51 | auto operator<=>(const IndirectBranchVariable&) const noexcept = default; | ||
| 52 | }; | ||
| 53 | |||
| 54 | using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag, | ||
| 55 | OverflowFlagTag, GotoVariable, IndirectBranchVariable>; | ||
| 56 | using ValueMap = boost::container::flat_map<IR::Block*, IR::Value>; | ||
| 57 | |||
| 58 | struct DefTable { | ||
| 59 | const IR::Value& Def(IR::Block* block, IR::Reg variable) { | ||
| 60 | return block->SsaRegValue(variable); | ||
| 61 | } | ||
| 62 | void SetDef(IR::Block* block, IR::Reg variable, const IR::Value& value) { | ||
| 63 | block->SetSsaRegValue(variable, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | const IR::Value& Def(IR::Block* block, IR::Pred variable) { | ||
| 67 | return preds[IR::PredIndex(variable)][block]; | ||
| 68 | } | ||
| 69 | void SetDef(IR::Block* block, IR::Pred variable, const IR::Value& value) { | ||
| 70 | preds[IR::PredIndex(variable)].insert_or_assign(block, value); | ||
| 71 | } | ||
| 72 | |||
| 73 | const IR::Value& Def(IR::Block* block, GotoVariable variable) { | ||
| 74 | return goto_vars[variable.index][block]; | ||
| 75 | } | ||
| 76 | void SetDef(IR::Block* block, GotoVariable variable, const IR::Value& value) { | ||
| 77 | goto_vars[variable.index].insert_or_assign(block, value); | ||
| 78 | } | ||
| 79 | |||
| 80 | const IR::Value& Def(IR::Block* block, IndirectBranchVariable) { | ||
| 81 | return indirect_branch_var[block]; | ||
| 82 | } | ||
| 83 | void SetDef(IR::Block* block, IndirectBranchVariable, const IR::Value& value) { | ||
| 84 | indirect_branch_var.insert_or_assign(block, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | const IR::Value& Def(IR::Block* block, ZeroFlagTag) { | ||
| 88 | return zero_flag[block]; | ||
| 89 | } | ||
| 90 | void SetDef(IR::Block* block, ZeroFlagTag, const IR::Value& value) { | ||
| 91 | zero_flag.insert_or_assign(block, value); | ||
| 92 | } | ||
| 93 | |||
| 94 | const IR::Value& Def(IR::Block* block, SignFlagTag) { | ||
| 95 | return sign_flag[block]; | ||
| 96 | } | ||
| 97 | void SetDef(IR::Block* block, SignFlagTag, const IR::Value& value) { | ||
| 98 | sign_flag.insert_or_assign(block, value); | ||
| 99 | } | ||
| 100 | |||
| 101 | const IR::Value& Def(IR::Block* block, CarryFlagTag) { | ||
| 102 | return carry_flag[block]; | ||
| 103 | } | ||
| 104 | void SetDef(IR::Block* block, CarryFlagTag, const IR::Value& value) { | ||
| 105 | carry_flag.insert_or_assign(block, value); | ||
| 106 | } | ||
| 107 | |||
| 108 | const IR::Value& Def(IR::Block* block, OverflowFlagTag) { | ||
| 109 | return overflow_flag[block]; | ||
| 110 | } | ||
| 111 | void SetDef(IR::Block* block, OverflowFlagTag, const IR::Value& value) { | ||
| 112 | overflow_flag.insert_or_assign(block, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | std::array<ValueMap, IR::NUM_USER_PREDS> preds; | ||
| 116 | boost::container::flat_map<u32, ValueMap> goto_vars; | ||
| 117 | ValueMap indirect_branch_var; | ||
| 118 | ValueMap zero_flag; | ||
| 119 | ValueMap sign_flag; | ||
| 120 | ValueMap carry_flag; | ||
| 121 | ValueMap overflow_flag; | ||
| 122 | }; | ||
| 123 | |||
| 124 | IR::Opcode UndefOpcode(IR::Reg) noexcept { | ||
| 125 | return IR::Opcode::UndefU32; | ||
| 126 | } | ||
| 127 | |||
| 128 | IR::Opcode UndefOpcode(IR::Pred) noexcept { | ||
| 129 | return IR::Opcode::UndefU1; | ||
| 130 | } | ||
| 131 | |||
| 132 | IR::Opcode UndefOpcode(const FlagTag&) noexcept { | ||
| 133 | return IR::Opcode::UndefU1; | ||
| 134 | } | ||
| 135 | |||
| 136 | IR::Opcode UndefOpcode(IndirectBranchVariable) noexcept { | ||
| 137 | return IR::Opcode::UndefU32; | ||
| 138 | } | ||
| 139 | |||
| 140 | enum class Status { | ||
| 141 | Start, | ||
| 142 | SetValue, | ||
| 143 | PreparePhiArgument, | ||
| 144 | PushPhiArgument, | ||
| 145 | }; | ||
| 146 | |||
| 147 | template <typename Type> | ||
| 148 | struct ReadState { | ||
| 149 | ReadState(IR::Block* block_) : block{block_} {} | ||
| 150 | ReadState() = default; | ||
| 151 | |||
| 152 | IR::Block* block{}; | ||
| 153 | IR::Value result{}; | ||
| 154 | IR::Inst* phi{}; | ||
| 155 | IR::Block* const* pred_it{}; | ||
| 156 | IR::Block* const* pred_end{}; | ||
| 157 | Status pc{Status::Start}; | ||
| 158 | }; | ||
| 159 | |||
| 160 | class Pass { | ||
| 161 | public: | ||
| 162 | template <typename Type> | ||
| 163 | void WriteVariable(Type variable, IR::Block* block, const IR::Value& value) { | ||
| 164 | current_def.SetDef(block, variable, value); | ||
| 165 | } | ||
| 166 | |||
| 167 | template <typename Type> | ||
| 168 | IR::Value ReadVariable(Type variable, IR::Block* root_block) { | ||
| 169 | boost::container::small_vector<ReadState<Type>, 64> stack{ | ||
| 170 | ReadState<Type>(nullptr), | ||
| 171 | ReadState<Type>(root_block), | ||
| 172 | }; | ||
| 173 | const auto prepare_phi_operand{[&] { | ||
| 174 | if (stack.back().pred_it == stack.back().pred_end) { | ||
| 175 | IR::Inst* const phi{stack.back().phi}; | ||
| 176 | IR::Block* const block{stack.back().block}; | ||
| 177 | const IR::Value result{TryRemoveTrivialPhi(*phi, block, UndefOpcode(variable))}; | ||
| 178 | stack.pop_back(); | ||
| 179 | stack.back().result = result; | ||
| 180 | WriteVariable(variable, block, result); | ||
| 181 | } else { | ||
| 182 | IR::Block* const imm_pred{*stack.back().pred_it}; | ||
| 183 | stack.back().pc = Status::PushPhiArgument; | ||
| 184 | stack.emplace_back(imm_pred); | ||
| 185 | } | ||
| 186 | }}; | ||
| 187 | do { | ||
| 188 | IR::Block* const block{stack.back().block}; | ||
| 189 | switch (stack.back().pc) { | ||
| 190 | case Status::Start: { | ||
| 191 | if (const IR::Value& def = current_def.Def(block, variable); !def.IsEmpty()) { | ||
| 192 | stack.back().result = def; | ||
| 193 | } else if (!block->IsSsaSealed()) { | ||
| 194 | // Incomplete CFG | ||
| 195 | IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; | ||
| 196 | phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); | ||
| 197 | |||
| 198 | incomplete_phis[block].insert_or_assign(variable, phi); | ||
| 199 | stack.back().result = IR::Value{&*phi}; | ||
| 200 | } else if (const std::span imm_preds = block->ImmPredecessors(); | ||
| 201 | imm_preds.size() == 1) { | ||
| 202 | // Optimize the common case of one predecessor: no phi needed | ||
| 203 | stack.back().pc = Status::SetValue; | ||
| 204 | stack.emplace_back(imm_preds.front()); | ||
| 205 | break; | ||
| 206 | } else { | ||
| 207 | // Break potential cycles with operandless phi | ||
| 208 | IR::Inst* const phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; | ||
| 209 | phi->SetFlags(IR::TypeOf(UndefOpcode(variable))); | ||
| 210 | |||
| 211 | WriteVariable(variable, block, IR::Value{phi}); | ||
| 212 | |||
| 213 | stack.back().phi = phi; | ||
| 214 | stack.back().pred_it = imm_preds.data(); | ||
| 215 | stack.back().pred_end = imm_preds.data() + imm_preds.size(); | ||
| 216 | prepare_phi_operand(); | ||
| 217 | break; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | [[fallthrough]]; | ||
| 221 | case Status::SetValue: { | ||
| 222 | const IR::Value result{stack.back().result}; | ||
| 223 | WriteVariable(variable, block, result); | ||
| 224 | stack.pop_back(); | ||
| 225 | stack.back().result = result; | ||
| 226 | break; | ||
| 227 | } | ||
| 228 | case Status::PushPhiArgument: { | ||
| 229 | IR::Inst* const phi{stack.back().phi}; | ||
| 230 | phi->AddPhiOperand(*stack.back().pred_it, stack.back().result); | ||
| 231 | ++stack.back().pred_it; | ||
| 232 | } | ||
| 233 | [[fallthrough]]; | ||
| 234 | case Status::PreparePhiArgument: | ||
| 235 | prepare_phi_operand(); | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | } while (stack.size() > 1); | ||
| 239 | return stack.back().result; | ||
| 240 | } | ||
| 241 | |||
| 242 | void SealBlock(IR::Block* block) { | ||
| 243 | const auto it{incomplete_phis.find(block)}; | ||
| 244 | if (it != incomplete_phis.end()) { | ||
| 245 | for (auto& pair : it->second) { | ||
| 246 | auto& variant{pair.first}; | ||
| 247 | auto& phi{pair.second}; | ||
| 248 | std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); | ||
| 249 | } | ||
| 250 | } | ||
| 251 | block->SsaSeal(); | ||
| 252 | } | ||
| 253 | |||
| 254 | private: | ||
| 255 | template <typename Type> | ||
| 256 | IR::Value AddPhiOperands(Type variable, IR::Inst& phi, IR::Block* block) { | ||
| 257 | for (IR::Block* const imm_pred : block->ImmPredecessors()) { | ||
| 258 | phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); | ||
| 259 | } | ||
| 260 | return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); | ||
| 261 | } | ||
| 262 | |||
| 263 | IR::Value TryRemoveTrivialPhi(IR::Inst& phi, IR::Block* block, IR::Opcode undef_opcode) { | ||
| 264 | IR::Value same; | ||
| 265 | const size_t num_args{phi.NumArgs()}; | ||
| 266 | for (size_t arg_index = 0; arg_index < num_args; ++arg_index) { | ||
| 267 | const IR::Value& op{phi.Arg(arg_index)}; | ||
| 268 | if (op.Resolve() == same.Resolve() || op == IR::Value{&phi}) { | ||
| 269 | // Unique value or self-reference | ||
| 270 | continue; | ||
| 271 | } | ||
| 272 | if (!same.IsEmpty()) { | ||
| 273 | // The phi merges at least two values: not trivial | ||
| 274 | return IR::Value{&phi}; | ||
| 275 | } | ||
| 276 | same = op; | ||
| 277 | } | ||
| 278 | // Remove the phi node from the block, it will be reinserted | ||
| 279 | IR::Block::InstructionList& list{block->Instructions()}; | ||
| 280 | list.erase(IR::Block::InstructionList::s_iterator_to(phi)); | ||
| 281 | |||
| 282 | // Find the first non-phi instruction and use it as an insertion point | ||
| 283 | IR::Block::iterator reinsert_point{std::ranges::find_if_not(list, IR::IsPhi)}; | ||
| 284 | if (same.IsEmpty()) { | ||
| 285 | // The phi is unreachable or in the start block | ||
| 286 | // Insert an undefined instruction and make it the phi node replacement | ||
| 287 | // The "phi" node reinsertion point is specified after this instruction | ||
| 288 | reinsert_point = block->PrependNewInst(reinsert_point, undef_opcode); | ||
| 289 | same = IR::Value{&*reinsert_point}; | ||
| 290 | ++reinsert_point; | ||
| 291 | } | ||
| 292 | // Reinsert the phi node and reroute all its uses to the "same" value | ||
| 293 | list.insert(reinsert_point, phi); | ||
| 294 | phi.ReplaceUsesWith(same); | ||
| 295 | // TODO: Try to recursively remove all phi users, which might have become trivial | ||
| 296 | return same; | ||
| 297 | } | ||
| 298 | |||
| 299 | boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>> | ||
| 300 | incomplete_phis; | ||
| 301 | DefTable current_def; | ||
| 302 | }; | ||
| 303 | |||
| 304 | void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { | ||
| 305 | switch (inst.GetOpcode()) { | ||
| 306 | case IR::Opcode::SetRegister: | ||
| 307 | if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { | ||
| 308 | pass.WriteVariable(reg, block, inst.Arg(1)); | ||
| 309 | } | ||
| 310 | break; | ||
| 311 | case IR::Opcode::SetPred: | ||
| 312 | if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { | ||
| 313 | pass.WriteVariable(pred, block, inst.Arg(1)); | ||
| 314 | } | ||
| 315 | break; | ||
| 316 | case IR::Opcode::SetGotoVariable: | ||
| 317 | pass.WriteVariable(GotoVariable{inst.Arg(0).U32()}, block, inst.Arg(1)); | ||
| 318 | break; | ||
| 319 | case IR::Opcode::SetIndirectBranchVariable: | ||
| 320 | pass.WriteVariable(IndirectBranchVariable{}, block, inst.Arg(0)); | ||
| 321 | break; | ||
| 322 | case IR::Opcode::SetZFlag: | ||
| 323 | pass.WriteVariable(ZeroFlagTag{}, block, inst.Arg(0)); | ||
| 324 | break; | ||
| 325 | case IR::Opcode::SetSFlag: | ||
| 326 | pass.WriteVariable(SignFlagTag{}, block, inst.Arg(0)); | ||
| 327 | break; | ||
| 328 | case IR::Opcode::SetCFlag: | ||
| 329 | pass.WriteVariable(CarryFlagTag{}, block, inst.Arg(0)); | ||
| 330 | break; | ||
| 331 | case IR::Opcode::SetOFlag: | ||
| 332 | pass.WriteVariable(OverflowFlagTag{}, block, inst.Arg(0)); | ||
| 333 | break; | ||
| 334 | case IR::Opcode::GetRegister: | ||
| 335 | if (const IR::Reg reg{inst.Arg(0).Reg()}; reg != IR::Reg::RZ) { | ||
| 336 | inst.ReplaceUsesWith(pass.ReadVariable(reg, block)); | ||
| 337 | } | ||
| 338 | break; | ||
| 339 | case IR::Opcode::GetPred: | ||
| 340 | if (const IR::Pred pred{inst.Arg(0).Pred()}; pred != IR::Pred::PT) { | ||
| 341 | inst.ReplaceUsesWith(pass.ReadVariable(pred, block)); | ||
| 342 | } | ||
| 343 | break; | ||
| 344 | case IR::Opcode::GetGotoVariable: | ||
| 345 | inst.ReplaceUsesWith(pass.ReadVariable(GotoVariable{inst.Arg(0).U32()}, block)); | ||
| 346 | break; | ||
| 347 | case IR::Opcode::GetIndirectBranchVariable: | ||
| 348 | inst.ReplaceUsesWith(pass.ReadVariable(IndirectBranchVariable{}, block)); | ||
| 349 | break; | ||
| 350 | case IR::Opcode::GetZFlag: | ||
| 351 | inst.ReplaceUsesWith(pass.ReadVariable(ZeroFlagTag{}, block)); | ||
| 352 | break; | ||
| 353 | case IR::Opcode::GetSFlag: | ||
| 354 | inst.ReplaceUsesWith(pass.ReadVariable(SignFlagTag{}, block)); | ||
| 355 | break; | ||
| 356 | case IR::Opcode::GetCFlag: | ||
| 357 | inst.ReplaceUsesWith(pass.ReadVariable(CarryFlagTag{}, block)); | ||
| 358 | break; | ||
| 359 | case IR::Opcode::GetOFlag: | ||
| 360 | inst.ReplaceUsesWith(pass.ReadVariable(OverflowFlagTag{}, block)); | ||
| 361 | break; | ||
| 362 | default: | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | |||
| 367 | void VisitBlock(Pass& pass, IR::Block* block) { | ||
| 368 | for (IR::Inst& inst : block->Instructions()) { | ||
| 369 | VisitInst(pass, block, inst); | ||
| 370 | } | ||
| 371 | pass.SealBlock(block); | ||
| 372 | } | ||
| 373 | } // Anonymous namespace | ||
| 374 | |||
| 375 | void SsaRewritePass(IR::Program& program) { | ||
| 376 | Pass pass; | ||
| 377 | const auto end{program.post_order_blocks.rend()}; | ||
| 378 | for (auto block = program.post_order_blocks.rbegin(); block != end; ++block) { | ||
| 379 | VisitBlock(pass, *block); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp new file mode 100644 index 000000000..44ad10d43 --- /dev/null +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -0,0 +1,523 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <bit> | ||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include <boost/container/small_vector.hpp> | ||
| 10 | |||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 15 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 16 | #include "shader_recompiler/shader_info.h" | ||
| 17 | |||
| 18 | namespace Shader::Optimization { | ||
| 19 | namespace { | ||
| 20 | struct ConstBufferAddr { | ||
| 21 | u32 index; | ||
| 22 | u32 offset; | ||
| 23 | u32 secondary_index; | ||
| 24 | u32 secondary_offset; | ||
| 25 | IR::U32 dynamic_offset; | ||
| 26 | u32 count; | ||
| 27 | bool has_secondary; | ||
| 28 | }; | ||
| 29 | |||
| 30 | struct TextureInst { | ||
| 31 | ConstBufferAddr cbuf; | ||
| 32 | IR::Inst* inst; | ||
| 33 | IR::Block* block; | ||
| 34 | }; | ||
| 35 | |||
| 36 | using TextureInstVector = boost::container::small_vector<TextureInst, 24>; | ||
| 37 | |||
| 38 | constexpr u32 DESCRIPTOR_SIZE = 8; | ||
| 39 | constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE)); | ||
| 40 | |||
| 41 | IR::Opcode IndexedInstruction(const IR::Inst& inst) { | ||
| 42 | switch (inst.GetOpcode()) { | ||
| 43 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 44 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 45 | return IR::Opcode::ImageSampleImplicitLod; | ||
| 46 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 47 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 48 | return IR::Opcode::ImageSampleExplicitLod; | ||
| 49 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 50 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 51 | return IR::Opcode::ImageSampleDrefImplicitLod; | ||
| 52 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 53 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 54 | return IR::Opcode::ImageSampleDrefExplicitLod; | ||
| 55 | case IR::Opcode::BindlessImageGather: | ||
| 56 | case IR::Opcode::BoundImageGather: | ||
| 57 | return IR::Opcode::ImageGather; | ||
| 58 | case IR::Opcode::BindlessImageGatherDref: | ||
| 59 | case IR::Opcode::BoundImageGatherDref: | ||
| 60 | return IR::Opcode::ImageGatherDref; | ||
| 61 | case IR::Opcode::BindlessImageFetch: | ||
| 62 | case IR::Opcode::BoundImageFetch: | ||
| 63 | return IR::Opcode::ImageFetch; | ||
| 64 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 65 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 66 | return IR::Opcode::ImageQueryDimensions; | ||
| 67 | case IR::Opcode::BoundImageQueryLod: | ||
| 68 | case IR::Opcode::BindlessImageQueryLod: | ||
| 69 | return IR::Opcode::ImageQueryLod; | ||
| 70 | case IR::Opcode::BoundImageGradient: | ||
| 71 | case IR::Opcode::BindlessImageGradient: | ||
| 72 | return IR::Opcode::ImageGradient; | ||
| 73 | case IR::Opcode::BoundImageRead: | ||
| 74 | case IR::Opcode::BindlessImageRead: | ||
| 75 | return IR::Opcode::ImageRead; | ||
| 76 | case IR::Opcode::BoundImageWrite: | ||
| 77 | case IR::Opcode::BindlessImageWrite: | ||
| 78 | return IR::Opcode::ImageWrite; | ||
| 79 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 80 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 81 | return IR::Opcode::ImageAtomicIAdd32; | ||
| 82 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 83 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 84 | return IR::Opcode::ImageAtomicSMin32; | ||
| 85 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 86 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 87 | return IR::Opcode::ImageAtomicUMin32; | ||
| 88 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 89 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 90 | return IR::Opcode::ImageAtomicSMax32; | ||
| 91 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 92 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 93 | return IR::Opcode::ImageAtomicUMax32; | ||
| 94 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 95 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 96 | return IR::Opcode::ImageAtomicInc32; | ||
| 97 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 98 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 99 | return IR::Opcode::ImageAtomicDec32; | ||
| 100 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 101 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 102 | return IR::Opcode::ImageAtomicAnd32; | ||
| 103 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 104 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 105 | return IR::Opcode::ImageAtomicOr32; | ||
| 106 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 107 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 108 | return IR::Opcode::ImageAtomicXor32; | ||
| 109 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 110 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 111 | return IR::Opcode::ImageAtomicExchange32; | ||
| 112 | default: | ||
| 113 | return IR::Opcode::Void; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | bool IsBindless(const IR::Inst& inst) { | ||
| 118 | switch (inst.GetOpcode()) { | ||
| 119 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 120 | case IR::Opcode::BindlessImageSampleExplicitLod: | ||
| 121 | case IR::Opcode::BindlessImageSampleDrefImplicitLod: | ||
| 122 | case IR::Opcode::BindlessImageSampleDrefExplicitLod: | ||
| 123 | case IR::Opcode::BindlessImageGather: | ||
| 124 | case IR::Opcode::BindlessImageGatherDref: | ||
| 125 | case IR::Opcode::BindlessImageFetch: | ||
| 126 | case IR::Opcode::BindlessImageQueryDimensions: | ||
| 127 | case IR::Opcode::BindlessImageQueryLod: | ||
| 128 | case IR::Opcode::BindlessImageGradient: | ||
| 129 | case IR::Opcode::BindlessImageRead: | ||
| 130 | case IR::Opcode::BindlessImageWrite: | ||
| 131 | case IR::Opcode::BindlessImageAtomicIAdd32: | ||
| 132 | case IR::Opcode::BindlessImageAtomicSMin32: | ||
| 133 | case IR::Opcode::BindlessImageAtomicUMin32: | ||
| 134 | case IR::Opcode::BindlessImageAtomicSMax32: | ||
| 135 | case IR::Opcode::BindlessImageAtomicUMax32: | ||
| 136 | case IR::Opcode::BindlessImageAtomicInc32: | ||
| 137 | case IR::Opcode::BindlessImageAtomicDec32: | ||
| 138 | case IR::Opcode::BindlessImageAtomicAnd32: | ||
| 139 | case IR::Opcode::BindlessImageAtomicOr32: | ||
| 140 | case IR::Opcode::BindlessImageAtomicXor32: | ||
| 141 | case IR::Opcode::BindlessImageAtomicExchange32: | ||
| 142 | return true; | ||
| 143 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 144 | case IR::Opcode::BoundImageSampleExplicitLod: | ||
| 145 | case IR::Opcode::BoundImageSampleDrefImplicitLod: | ||
| 146 | case IR::Opcode::BoundImageSampleDrefExplicitLod: | ||
| 147 | case IR::Opcode::BoundImageGather: | ||
| 148 | case IR::Opcode::BoundImageGatherDref: | ||
| 149 | case IR::Opcode::BoundImageFetch: | ||
| 150 | case IR::Opcode::BoundImageQueryDimensions: | ||
| 151 | case IR::Opcode::BoundImageQueryLod: | ||
| 152 | case IR::Opcode::BoundImageGradient: | ||
| 153 | case IR::Opcode::BoundImageRead: | ||
| 154 | case IR::Opcode::BoundImageWrite: | ||
| 155 | case IR::Opcode::BoundImageAtomicIAdd32: | ||
| 156 | case IR::Opcode::BoundImageAtomicSMin32: | ||
| 157 | case IR::Opcode::BoundImageAtomicUMin32: | ||
| 158 | case IR::Opcode::BoundImageAtomicSMax32: | ||
| 159 | case IR::Opcode::BoundImageAtomicUMax32: | ||
| 160 | case IR::Opcode::BoundImageAtomicInc32: | ||
| 161 | case IR::Opcode::BoundImageAtomicDec32: | ||
| 162 | case IR::Opcode::BoundImageAtomicAnd32: | ||
| 163 | case IR::Opcode::BoundImageAtomicOr32: | ||
| 164 | case IR::Opcode::BoundImageAtomicXor32: | ||
| 165 | case IR::Opcode::BoundImageAtomicExchange32: | ||
| 166 | return false; | ||
| 167 | default: | ||
| 168 | throw InvalidArgument("Invalid opcode {}", inst.GetOpcode()); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 | bool IsTextureInstruction(const IR::Inst& inst) { | ||
| 173 | return IndexedInstruction(inst) != IR::Opcode::Void; | ||
| 174 | } | ||
| 175 | |||
| 176 | std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst); | ||
| 177 | |||
| 178 | std::optional<ConstBufferAddr> Track(const IR::Value& value) { | ||
| 179 | return IR::BreadthFirstSearch(value, TryGetConstBuffer); | ||
| 180 | } | ||
| 181 | |||
| 182 | std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst) { | ||
| 183 | switch (inst->GetOpcode()) { | ||
| 184 | default: | ||
| 185 | return std::nullopt; | ||
| 186 | case IR::Opcode::BitwiseOr32: { | ||
| 187 | std::optional lhs{Track(inst->Arg(0))}; | ||
| 188 | std::optional rhs{Track(inst->Arg(1))}; | ||
| 189 | if (!lhs || !rhs) { | ||
| 190 | return std::nullopt; | ||
| 191 | } | ||
| 192 | if (lhs->has_secondary || rhs->has_secondary) { | ||
| 193 | return std::nullopt; | ||
| 194 | } | ||
| 195 | if (lhs->count > 1 || rhs->count > 1) { | ||
| 196 | return std::nullopt; | ||
| 197 | } | ||
| 198 | if (lhs->index > rhs->index || lhs->offset > rhs->offset) { | ||
| 199 | std::swap(lhs, rhs); | ||
| 200 | } | ||
| 201 | return ConstBufferAddr{ | ||
| 202 | .index = lhs->index, | ||
| 203 | .offset = lhs->offset, | ||
| 204 | .secondary_index = rhs->index, | ||
| 205 | .secondary_offset = rhs->offset, | ||
| 206 | .dynamic_offset = {}, | ||
| 207 | .count = 1, | ||
| 208 | .has_secondary = true, | ||
| 209 | }; | ||
| 210 | } | ||
| 211 | case IR::Opcode::GetCbufU32x2: | ||
| 212 | case IR::Opcode::GetCbufU32: | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | const IR::Value index{inst->Arg(0)}; | ||
| 216 | const IR::Value offset{inst->Arg(1)}; | ||
| 217 | if (!index.IsImmediate()) { | ||
| 218 | // Reading a bindless texture from variable indices is valid | ||
| 219 | // but not supported here at the moment | ||
| 220 | return std::nullopt; | ||
| 221 | } | ||
| 222 | if (offset.IsImmediate()) { | ||
| 223 | return ConstBufferAddr{ | ||
| 224 | .index = index.U32(), | ||
| 225 | .offset = offset.U32(), | ||
| 226 | .secondary_index = 0, | ||
| 227 | .secondary_offset = 0, | ||
| 228 | .dynamic_offset = {}, | ||
| 229 | .count = 1, | ||
| 230 | .has_secondary = false, | ||
| 231 | }; | ||
| 232 | } | ||
| 233 | IR::Inst* const offset_inst{offset.InstRecursive()}; | ||
| 234 | if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { | ||
| 235 | return std::nullopt; | ||
| 236 | } | ||
| 237 | u32 base_offset{}; | ||
| 238 | IR::U32 dynamic_offset; | ||
| 239 | if (offset_inst->Arg(0).IsImmediate()) { | ||
| 240 | base_offset = offset_inst->Arg(0).U32(); | ||
| 241 | dynamic_offset = IR::U32{offset_inst->Arg(1)}; | ||
| 242 | } else if (offset_inst->Arg(1).IsImmediate()) { | ||
| 243 | base_offset = offset_inst->Arg(1).U32(); | ||
| 244 | dynamic_offset = IR::U32{offset_inst->Arg(0)}; | ||
| 245 | } else { | ||
| 246 | return std::nullopt; | ||
| 247 | } | ||
| 248 | return ConstBufferAddr{ | ||
| 249 | .index = index.U32(), | ||
| 250 | .offset = base_offset, | ||
| 251 | .secondary_index = 0, | ||
| 252 | .secondary_offset = 0, | ||
| 253 | .dynamic_offset = dynamic_offset, | ||
| 254 | .count = 8, | ||
| 255 | .has_secondary = false, | ||
| 256 | }; | ||
| 257 | } | ||
| 258 | |||
| 259 | TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { | ||
| 260 | ConstBufferAddr addr; | ||
| 261 | if (IsBindless(inst)) { | ||
| 262 | const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0))}; | ||
| 263 | if (!track_addr) { | ||
| 264 | throw NotImplementedException("Failed to track bindless texture constant buffer"); | ||
| 265 | } | ||
| 266 | addr = *track_addr; | ||
| 267 | } else { | ||
| 268 | addr = ConstBufferAddr{ | ||
| 269 | .index = env.TextureBoundBuffer(), | ||
| 270 | .offset = inst.Arg(0).U32(), | ||
| 271 | .secondary_index = 0, | ||
| 272 | .secondary_offset = 0, | ||
| 273 | .dynamic_offset = {}, | ||
| 274 | .count = 1, | ||
| 275 | .has_secondary = false, | ||
| 276 | }; | ||
| 277 | } | ||
| 278 | return TextureInst{ | ||
| 279 | .cbuf = addr, | ||
| 280 | .inst = &inst, | ||
| 281 | .block = block, | ||
| 282 | }; | ||
| 283 | } | ||
| 284 | |||
| 285 | TextureType ReadTextureType(Environment& env, const ConstBufferAddr& cbuf) { | ||
| 286 | const u32 secondary_index{cbuf.has_secondary ? cbuf.secondary_index : cbuf.index}; | ||
| 287 | const u32 secondary_offset{cbuf.has_secondary ? cbuf.secondary_offset : cbuf.offset}; | ||
| 288 | const u32 lhs_raw{env.ReadCbufValue(cbuf.index, cbuf.offset)}; | ||
| 289 | const u32 rhs_raw{env.ReadCbufValue(secondary_index, secondary_offset)}; | ||
| 290 | return env.ReadTextureType(lhs_raw | rhs_raw); | ||
| 291 | } | ||
| 292 | |||
| 293 | class Descriptors { | ||
| 294 | public: | ||
| 295 | explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_, | ||
| 296 | ImageBufferDescriptors& image_buffer_descriptors_, | ||
| 297 | TextureDescriptors& texture_descriptors_, | ||
| 298 | ImageDescriptors& image_descriptors_) | ||
| 299 | : texture_buffer_descriptors{texture_buffer_descriptors_}, | ||
| 300 | image_buffer_descriptors{image_buffer_descriptors_}, | ||
| 301 | texture_descriptors{texture_descriptors_}, image_descriptors{image_descriptors_} {} | ||
| 302 | |||
| 303 | u32 Add(const TextureBufferDescriptor& desc) { | ||
| 304 | return Add(texture_buffer_descriptors, desc, [&desc](const auto& existing) { | ||
| 305 | return desc.cbuf_index == existing.cbuf_index && | ||
| 306 | desc.cbuf_offset == existing.cbuf_offset && | ||
| 307 | desc.secondary_cbuf_index == existing.secondary_cbuf_index && | ||
| 308 | desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && | ||
| 309 | desc.count == existing.count && desc.size_shift == existing.size_shift && | ||
| 310 | desc.has_secondary == existing.has_secondary; | ||
| 311 | }); | ||
| 312 | } | ||
| 313 | |||
| 314 | u32 Add(const ImageBufferDescriptor& desc) { | ||
| 315 | const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { | ||
| 316 | return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && | ||
| 317 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||
| 318 | desc.size_shift == existing.size_shift; | ||
| 319 | })}; | ||
| 320 | image_buffer_descriptors[index].is_written |= desc.is_written; | ||
| 321 | image_buffer_descriptors[index].is_read |= desc.is_read; | ||
| 322 | return index; | ||
| 323 | } | ||
| 324 | |||
| 325 | u32 Add(const TextureDescriptor& desc) { | ||
| 326 | return Add(texture_descriptors, desc, [&desc](const auto& existing) { | ||
| 327 | return desc.type == existing.type && desc.is_depth == existing.is_depth && | ||
| 328 | desc.has_secondary == existing.has_secondary && | ||
| 329 | desc.cbuf_index == existing.cbuf_index && | ||
| 330 | desc.cbuf_offset == existing.cbuf_offset && | ||
| 331 | desc.secondary_cbuf_index == existing.secondary_cbuf_index && | ||
| 332 | desc.secondary_cbuf_offset == existing.secondary_cbuf_offset && | ||
| 333 | desc.count == existing.count && desc.size_shift == existing.size_shift; | ||
| 334 | }); | ||
| 335 | } | ||
| 336 | |||
| 337 | u32 Add(const ImageDescriptor& desc) { | ||
| 338 | const u32 index{Add(image_descriptors, desc, [&desc](const auto& existing) { | ||
| 339 | return desc.type == existing.type && desc.format == existing.format && | ||
| 340 | desc.cbuf_index == existing.cbuf_index && | ||
| 341 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||
| 342 | desc.size_shift == existing.size_shift; | ||
| 343 | })}; | ||
| 344 | image_descriptors[index].is_written |= desc.is_written; | ||
| 345 | image_descriptors[index].is_read |= desc.is_read; | ||
| 346 | return index; | ||
| 347 | } | ||
| 348 | |||
| 349 | private: | ||
| 350 | template <typename Descriptors, typename Descriptor, typename Func> | ||
| 351 | static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||
| 352 | // TODO: Handle arrays | ||
| 353 | const auto it{std::ranges::find_if(descriptors, pred)}; | ||
| 354 | if (it != descriptors.end()) { | ||
| 355 | return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||
| 356 | } | ||
| 357 | descriptors.push_back(desc); | ||
| 358 | return static_cast<u32>(descriptors.size()) - 1; | ||
| 359 | } | ||
| 360 | |||
| 361 | TextureBufferDescriptors& texture_buffer_descriptors; | ||
| 362 | ImageBufferDescriptors& image_buffer_descriptors; | ||
| 363 | TextureDescriptors& texture_descriptors; | ||
| 364 | ImageDescriptors& image_descriptors; | ||
| 365 | }; | ||
| 366 | } // Anonymous namespace | ||
| 367 | |||
| 368 | void TexturePass(Environment& env, IR::Program& program) { | ||
| 369 | TextureInstVector to_replace; | ||
| 370 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 371 | for (IR::Inst& inst : block->Instructions()) { | ||
| 372 | if (!IsTextureInstruction(inst)) { | ||
| 373 | continue; | ||
| 374 | } | ||
| 375 | to_replace.push_back(MakeInst(env, block, inst)); | ||
| 376 | } | ||
| 377 | } | ||
| 378 | // Sort instructions to visit textures by constant buffer index, then by offset | ||
| 379 | std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) { | ||
| 380 | return lhs.cbuf.offset < rhs.cbuf.offset; | ||
| 381 | }); | ||
| 382 | std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) { | ||
| 383 | return lhs.cbuf.index < rhs.cbuf.index; | ||
| 384 | }); | ||
| 385 | Descriptors descriptors{ | ||
| 386 | program.info.texture_buffer_descriptors, | ||
| 387 | program.info.image_buffer_descriptors, | ||
| 388 | program.info.texture_descriptors, | ||
| 389 | program.info.image_descriptors, | ||
| 390 | }; | ||
| 391 | for (TextureInst& texture_inst : to_replace) { | ||
| 392 | // TODO: Handle arrays | ||
| 393 | IR::Inst* const inst{texture_inst.inst}; | ||
| 394 | inst->ReplaceOpcode(IndexedInstruction(*inst)); | ||
| 395 | |||
| 396 | const auto& cbuf{texture_inst.cbuf}; | ||
| 397 | auto flags{inst->Flags<IR::TextureInstInfo>()}; | ||
| 398 | switch (inst->GetOpcode()) { | ||
| 399 | case IR::Opcode::ImageQueryDimensions: | ||
| 400 | flags.type.Assign(ReadTextureType(env, cbuf)); | ||
| 401 | inst->SetFlags(flags); | ||
| 402 | break; | ||
| 403 | case IR::Opcode::ImageFetch: | ||
| 404 | if (flags.type != TextureType::Color1D) { | ||
| 405 | break; | ||
| 406 | } | ||
| 407 | if (ReadTextureType(env, cbuf) == TextureType::Buffer) { | ||
| 408 | // Replace with the bound texture type only when it's a texture buffer | ||
| 409 | // If the instruction is 1D and the bound type is 2D, don't change the code and let | ||
| 410 | // the rasterizer robustness handle it | ||
| 411 | // This happens on Fire Emblem: Three Houses | ||
| 412 | flags.type.Assign(TextureType::Buffer); | ||
| 413 | } | ||
| 414 | break; | ||
| 415 | default: | ||
| 416 | break; | ||
| 417 | } | ||
| 418 | u32 index; | ||
| 419 | switch (inst->GetOpcode()) { | ||
| 420 | case IR::Opcode::ImageRead: | ||
| 421 | case IR::Opcode::ImageAtomicIAdd32: | ||
| 422 | case IR::Opcode::ImageAtomicSMin32: | ||
| 423 | case IR::Opcode::ImageAtomicUMin32: | ||
| 424 | case IR::Opcode::ImageAtomicSMax32: | ||
| 425 | case IR::Opcode::ImageAtomicUMax32: | ||
| 426 | case IR::Opcode::ImageAtomicInc32: | ||
| 427 | case IR::Opcode::ImageAtomicDec32: | ||
| 428 | case IR::Opcode::ImageAtomicAnd32: | ||
| 429 | case IR::Opcode::ImageAtomicOr32: | ||
| 430 | case IR::Opcode::ImageAtomicXor32: | ||
| 431 | case IR::Opcode::ImageAtomicExchange32: | ||
| 432 | case IR::Opcode::ImageWrite: { | ||
| 433 | if (cbuf.has_secondary) { | ||
| 434 | throw NotImplementedException("Unexpected separate sampler"); | ||
| 435 | } | ||
| 436 | const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; | ||
| 437 | const bool is_read{inst->GetOpcode() != IR::Opcode::ImageWrite}; | ||
| 438 | if (flags.type == TextureType::Buffer) { | ||
| 439 | index = descriptors.Add(ImageBufferDescriptor{ | ||
| 440 | .format = flags.image_format, | ||
| 441 | .is_written = is_written, | ||
| 442 | .is_read = is_read, | ||
| 443 | .cbuf_index = cbuf.index, | ||
| 444 | .cbuf_offset = cbuf.offset, | ||
| 445 | .count = cbuf.count, | ||
| 446 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 447 | }); | ||
| 448 | } else { | ||
| 449 | index = descriptors.Add(ImageDescriptor{ | ||
| 450 | .type = flags.type, | ||
| 451 | .format = flags.image_format, | ||
| 452 | .is_written = is_written, | ||
| 453 | .is_read = is_read, | ||
| 454 | .cbuf_index = cbuf.index, | ||
| 455 | .cbuf_offset = cbuf.offset, | ||
| 456 | .count = cbuf.count, | ||
| 457 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 458 | }); | ||
| 459 | } | ||
| 460 | break; | ||
| 461 | } | ||
| 462 | default: | ||
| 463 | if (flags.type == TextureType::Buffer) { | ||
| 464 | index = descriptors.Add(TextureBufferDescriptor{ | ||
| 465 | .has_secondary = cbuf.has_secondary, | ||
| 466 | .cbuf_index = cbuf.index, | ||
| 467 | .cbuf_offset = cbuf.offset, | ||
| 468 | .secondary_cbuf_index = cbuf.secondary_index, | ||
| 469 | .secondary_cbuf_offset = cbuf.secondary_offset, | ||
| 470 | .count = cbuf.count, | ||
| 471 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 472 | }); | ||
| 473 | } else { | ||
| 474 | index = descriptors.Add(TextureDescriptor{ | ||
| 475 | .type = flags.type, | ||
| 476 | .is_depth = flags.is_depth != 0, | ||
| 477 | .has_secondary = cbuf.has_secondary, | ||
| 478 | .cbuf_index = cbuf.index, | ||
| 479 | .cbuf_offset = cbuf.offset, | ||
| 480 | .secondary_cbuf_index = cbuf.secondary_index, | ||
| 481 | .secondary_cbuf_offset = cbuf.secondary_offset, | ||
| 482 | .count = cbuf.count, | ||
| 483 | .size_shift = DESCRIPTOR_SIZE_SHIFT, | ||
| 484 | }); | ||
| 485 | } | ||
| 486 | break; | ||
| 487 | } | ||
| 488 | flags.descriptor_index.Assign(index); | ||
| 489 | inst->SetFlags(flags); | ||
| 490 | |||
| 491 | if (cbuf.count > 1) { | ||
| 492 | const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; | ||
| 493 | IR::IREmitter ir{*texture_inst.block, insert_point}; | ||
| 494 | const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; | ||
| 495 | inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); | ||
| 496 | } else { | ||
| 497 | inst->SetArg(0, IR::Value{}); | ||
| 498 | } | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 | void JoinTextureInfo(Info& base, Info& source) { | ||
| 503 | Descriptors descriptors{ | ||
| 504 | base.texture_buffer_descriptors, | ||
| 505 | base.image_buffer_descriptors, | ||
| 506 | base.texture_descriptors, | ||
| 507 | base.image_descriptors, | ||
| 508 | }; | ||
| 509 | for (auto& desc : source.texture_buffer_descriptors) { | ||
| 510 | descriptors.Add(desc); | ||
| 511 | } | ||
| 512 | for (auto& desc : source.image_buffer_descriptors) { | ||
| 513 | descriptors.Add(desc); | ||
| 514 | } | ||
| 515 | for (auto& desc : source.texture_descriptors) { | ||
| 516 | descriptors.Add(desc); | ||
| 517 | } | ||
| 518 | for (auto& desc : source.image_descriptors) { | ||
| 519 | descriptors.Add(desc); | ||
| 520 | } | ||
| 521 | } | ||
| 522 | |||
| 523 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/verification_pass.cpp b/src/shader_recompiler/ir_opt/verification_pass.cpp new file mode 100644 index 000000000..975d5aadf --- /dev/null +++ b/src/shader_recompiler/ir_opt/verification_pass.cpp | |||
| @@ -0,0 +1,98 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | #include <set> | ||
| 7 | |||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 12 | |||
| 13 | namespace Shader::Optimization { | ||
| 14 | |||
| 15 | static void ValidateTypes(const IR::Program& program) { | ||
| 16 | for (const auto& block : program.blocks) { | ||
| 17 | for (const IR::Inst& inst : *block) { | ||
| 18 | if (inst.GetOpcode() == IR::Opcode::Phi) { | ||
| 19 | // Skip validation on phi nodes | ||
| 20 | continue; | ||
| 21 | } | ||
| 22 | const size_t num_args{inst.NumArgs()}; | ||
| 23 | for (size_t i = 0; i < num_args; ++i) { | ||
| 24 | const IR::Type t1{inst.Arg(i).Type()}; | ||
| 25 | const IR::Type t2{IR::ArgTypeOf(inst.GetOpcode(), i)}; | ||
| 26 | if (!IR::AreTypesCompatible(t1, t2)) { | ||
| 27 | throw LogicError("Invalid types in block:\n{}", IR::DumpBlock(*block)); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | static void ValidateUses(const IR::Program& program) { | ||
| 35 | std::map<IR::Inst*, int> actual_uses; | ||
| 36 | for (const auto& block : program.blocks) { | ||
| 37 | for (const IR::Inst& inst : *block) { | ||
| 38 | const size_t num_args{inst.NumArgs()}; | ||
| 39 | for (size_t i = 0; i < num_args; ++i) { | ||
| 40 | const IR::Value arg{inst.Arg(i)}; | ||
| 41 | if (!arg.IsImmediate()) { | ||
| 42 | ++actual_uses[arg.Inst()]; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } | ||
| 47 | for (const auto [inst, uses] : actual_uses) { | ||
| 48 | if (inst->UseCount() != uses) { | ||
| 49 | throw LogicError("Invalid uses in block: {}", IR::DumpProgram(program)); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | } | ||
| 53 | |||
| 54 | static void ValidateForwardDeclarations(const IR::Program& program) { | ||
| 55 | std::set<const IR::Inst*> definitions; | ||
| 56 | for (const IR::Block* const block : program.blocks) { | ||
| 57 | for (const IR::Inst& inst : *block) { | ||
| 58 | definitions.emplace(&inst); | ||
| 59 | if (inst.GetOpcode() == IR::Opcode::Phi) { | ||
| 60 | // Phi nodes can have forward declarations | ||
| 61 | continue; | ||
| 62 | } | ||
| 63 | const size_t num_args{inst.NumArgs()}; | ||
| 64 | for (size_t arg = 0; arg < num_args; ++arg) { | ||
| 65 | if (inst.Arg(arg).IsImmediate()) { | ||
| 66 | continue; | ||
| 67 | } | ||
| 68 | if (!definitions.contains(inst.Arg(arg).Inst())) { | ||
| 69 | throw LogicError("Forward declaration in block: {}", IR::DumpBlock(*block)); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | static void ValidatePhiNodes(const IR::Program& program) { | ||
| 77 | for (const IR::Block* const block : program.blocks) { | ||
| 78 | bool no_more_phis{false}; | ||
| 79 | for (const IR::Inst& inst : *block) { | ||
| 80 | if (inst.GetOpcode() == IR::Opcode::Phi) { | ||
| 81 | if (no_more_phis) { | ||
| 82 | throw LogicError("Interleaved phi nodes: {}", IR::DumpBlock(*block)); | ||
| 83 | } | ||
| 84 | } else { | ||
| 85 | no_more_phis = true; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | void VerificationPass(const IR::Program& program) { | ||
| 92 | ValidateTypes(program); | ||
| 93 | ValidateUses(program); | ||
| 94 | ValidateForwardDeclarations(program); | ||
| 95 | ValidatePhiNodes(program); | ||
| 96 | } | ||
| 97 | |||
| 98 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h new file mode 100644 index 000000000..f8b255b66 --- /dev/null +++ b/src/shader_recompiler/object_pool.h | |||
| @@ -0,0 +1,104 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | namespace Shader { | ||
| 12 | |||
| 13 | template <typename T> | ||
| 14 | requires std::is_destructible_v<T> class ObjectPool { | ||
| 15 | public: | ||
| 16 | explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { | ||
| 17 | node = &chunks.emplace_back(new_chunk_size); | ||
| 18 | } | ||
| 19 | |||
| 20 | template <typename... Args> | ||
| 21 | requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { | ||
| 22 | return std::construct_at(Memory(), std::forward<Args>(args)...); | ||
| 23 | } | ||
| 24 | |||
| 25 | void ReleaseContents() { | ||
| 26 | if (chunks.empty()) { | ||
| 27 | return; | ||
| 28 | } | ||
| 29 | Chunk& root{chunks.front()}; | ||
| 30 | if (root.used_objects == root.num_objects) { | ||
| 31 | // Root chunk has been filled, squash allocations into it | ||
| 32 | const size_t total_objects{root.num_objects + new_chunk_size * (chunks.size() - 1)}; | ||
| 33 | chunks.clear(); | ||
| 34 | chunks.emplace_back(total_objects); | ||
| 35 | } else { | ||
| 36 | root.Release(); | ||
| 37 | chunks.resize(1); | ||
| 38 | } | ||
| 39 | chunks.shrink_to_fit(); | ||
| 40 | node = &chunks.front(); | ||
| 41 | } | ||
| 42 | |||
| 43 | private: | ||
| 44 | struct NonTrivialDummy { | ||
| 45 | NonTrivialDummy() noexcept {} | ||
| 46 | }; | ||
| 47 | |||
| 48 | union Storage { | ||
| 49 | Storage() noexcept {} | ||
| 50 | ~Storage() noexcept {} | ||
| 51 | |||
| 52 | NonTrivialDummy dummy{}; | ||
| 53 | T object; | ||
| 54 | }; | ||
| 55 | |||
| 56 | struct Chunk { | ||
| 57 | explicit Chunk() = default; | ||
| 58 | explicit Chunk(size_t size) | ||
| 59 | : num_objects{size}, storage{std::make_unique<Storage[]>(size)} {} | ||
| 60 | |||
| 61 | Chunk& operator=(Chunk&& rhs) noexcept { | ||
| 62 | Release(); | ||
| 63 | used_objects = std::exchange(rhs.used_objects, 0); | ||
| 64 | num_objects = std::exchange(rhs.num_objects, 0); | ||
| 65 | storage = std::move(rhs.storage); | ||
| 66 | } | ||
| 67 | |||
| 68 | Chunk(Chunk&& rhs) noexcept | ||
| 69 | : used_objects{std::exchange(rhs.used_objects, 0)}, | ||
| 70 | num_objects{std::exchange(rhs.num_objects, 0)}, storage{std::move(rhs.storage)} {} | ||
| 71 | |||
| 72 | ~Chunk() { | ||
| 73 | Release(); | ||
| 74 | } | ||
| 75 | |||
| 76 | void Release() { | ||
| 77 | std::destroy_n(storage.get(), used_objects); | ||
| 78 | used_objects = 0; | ||
| 79 | } | ||
| 80 | |||
| 81 | size_t used_objects{}; | ||
| 82 | size_t num_objects{}; | ||
| 83 | std::unique_ptr<Storage[]> storage; | ||
| 84 | }; | ||
| 85 | |||
| 86 | [[nodiscard]] T* Memory() { | ||
| 87 | Chunk* const chunk{FreeChunk()}; | ||
| 88 | return &chunk->storage[chunk->used_objects++].object; | ||
| 89 | } | ||
| 90 | |||
| 91 | [[nodiscard]] Chunk* FreeChunk() { | ||
| 92 | if (node->used_objects != node->num_objects) { | ||
| 93 | return node; | ||
| 94 | } | ||
| 95 | node = &chunks.emplace_back(new_chunk_size); | ||
| 96 | return node; | ||
| 97 | } | ||
| 98 | |||
| 99 | Chunk* node{}; | ||
| 100 | std::vector<Chunk> chunks; | ||
| 101 | size_t new_chunk_size{}; | ||
| 102 | }; | ||
| 103 | |||
| 104 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h new file mode 100644 index 000000000..f0c3b3b17 --- /dev/null +++ b/src/shader_recompiler/profile.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader { | ||
| 10 | |||
| 11 | struct Profile { | ||
| 12 | u32 supported_spirv{0x00010000}; | ||
| 13 | |||
| 14 | bool unified_descriptor_binding{}; | ||
| 15 | bool support_descriptor_aliasing{}; | ||
| 16 | bool support_int8{}; | ||
| 17 | bool support_int16{}; | ||
| 18 | bool support_int64{}; | ||
| 19 | bool support_vertex_instance_id{}; | ||
| 20 | bool support_float_controls{}; | ||
| 21 | bool support_separate_denorm_behavior{}; | ||
| 22 | bool support_separate_rounding_mode{}; | ||
| 23 | bool support_fp16_denorm_preserve{}; | ||
| 24 | bool support_fp32_denorm_preserve{}; | ||
| 25 | bool support_fp16_denorm_flush{}; | ||
| 26 | bool support_fp32_denorm_flush{}; | ||
| 27 | bool support_fp16_signed_zero_nan_preserve{}; | ||
| 28 | bool support_fp32_signed_zero_nan_preserve{}; | ||
| 29 | bool support_fp64_signed_zero_nan_preserve{}; | ||
| 30 | bool support_explicit_workgroup_layout{}; | ||
| 31 | bool support_vote{}; | ||
| 32 | bool support_viewport_index_layer_non_geometry{}; | ||
| 33 | bool support_viewport_mask{}; | ||
| 34 | bool support_typeless_image_loads{}; | ||
| 35 | bool support_demote_to_helper_invocation{}; | ||
| 36 | bool support_int64_atomics{}; | ||
| 37 | bool support_derivative_control{}; | ||
| 38 | bool support_geometry_shader_passthrough{}; | ||
| 39 | bool support_gl_nv_gpu_shader_5{}; | ||
| 40 | bool support_gl_amd_gpu_shader_half_float{}; | ||
| 41 | bool support_gl_texture_shadow_lod{}; | ||
| 42 | bool support_gl_warp_intrinsics{}; | ||
| 43 | bool support_gl_variable_aoffi{}; | ||
| 44 | bool support_gl_sparse_textures{}; | ||
| 45 | bool support_gl_derivative_control{}; | ||
| 46 | |||
| 47 | bool warp_size_potentially_larger_than_guest{}; | ||
| 48 | |||
| 49 | bool lower_left_origin_mode{}; | ||
| 50 | /// Fragment outputs have to be declared even if they are not written to avoid undefined values. | ||
| 51 | /// See Ori and the Blind Forest's main menu for reference. | ||
| 52 | bool need_declared_frag_colors{}; | ||
| 53 | /// Prevents fast math optimizations that may cause inaccuracies | ||
| 54 | bool need_fastmath_off{}; | ||
| 55 | |||
| 56 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | ||
| 57 | bool has_broken_spirv_clamp{}; | ||
| 58 | /// Offset image operands with an unsigned type do not work | ||
| 59 | bool has_broken_unsigned_image_offsets{}; | ||
| 60 | /// Signed instructions with unsigned data types are misinterpreted | ||
| 61 | bool has_broken_signed_operations{}; | ||
| 62 | /// Float controls break when fp16 is enabled | ||
| 63 | bool has_broken_fp16_float_controls{}; | ||
| 64 | /// Dynamic vec4 indexing is broken on some OpenGL drivers | ||
| 65 | bool has_gl_component_indexing_bug{}; | ||
| 66 | /// The precise type qualifier is broken in the fragment stage of some drivers | ||
| 67 | bool has_gl_precise_bug{}; | ||
| 68 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics | ||
| 69 | bool ignore_nan_fp_comparisons{}; | ||
| 70 | |||
| 71 | u32 gl_max_compute_smem_size{}; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/program_header.h b/src/shader_recompiler/program_header.h new file mode 100644 index 000000000..bd6c2bfb5 --- /dev/null +++ b/src/shader_recompiler/program_header.h | |||
| @@ -0,0 +1,219 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | |||
| 14 | namespace Shader { | ||
| 15 | |||
| 16 | enum class OutputTopology : u32 { | ||
| 17 | PointList = 1, | ||
| 18 | LineStrip = 6, | ||
| 19 | TriangleStrip = 7, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class PixelImap : u8 { | ||
| 23 | Unused = 0, | ||
| 24 | Constant = 1, | ||
| 25 | Perspective = 2, | ||
| 26 | ScreenLinear = 3, | ||
| 27 | }; | ||
| 28 | |||
| 29 | // Documentation in: | ||
| 30 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html | ||
| 31 | struct ProgramHeader { | ||
| 32 | union { | ||
| 33 | BitField<0, 5, u32> sph_type; | ||
| 34 | BitField<5, 5, u32> version; | ||
| 35 | BitField<10, 4, u32> shader_type; | ||
| 36 | BitField<14, 1, u32> mrt_enable; | ||
| 37 | BitField<15, 1, u32> kills_pixels; | ||
| 38 | BitField<16, 1, u32> does_global_store; | ||
| 39 | BitField<17, 4, u32> sass_version; | ||
| 40 | BitField<21, 2, u32> reserved1; | ||
| 41 | BitField<24, 1, u32> geometry_passthrough; | ||
| 42 | BitField<25, 1, u32> reserved2; | ||
| 43 | BitField<26, 1, u32> does_load_or_store; | ||
| 44 | BitField<27, 1, u32> does_fp64; | ||
| 45 | BitField<28, 4, u32> stream_out_mask; | ||
| 46 | } common0; | ||
| 47 | |||
| 48 | union { | ||
| 49 | BitField<0, 24, u32> shader_local_memory_low_size; | ||
| 50 | BitField<24, 8, u32> per_patch_attribute_count; | ||
| 51 | } common1; | ||
| 52 | |||
| 53 | union { | ||
| 54 | BitField<0, 24, u32> shader_local_memory_high_size; | ||
| 55 | BitField<24, 8, u32> threads_per_input_primitive; | ||
| 56 | } common2; | ||
| 57 | |||
| 58 | union { | ||
| 59 | BitField<0, 24, u32> shader_local_memory_crs_size; | ||
| 60 | BitField<24, 4, OutputTopology> output_topology; | ||
| 61 | BitField<28, 4, u32> reserved; | ||
| 62 | } common3; | ||
| 63 | |||
| 64 | union { | ||
| 65 | BitField<0, 12, u32> max_output_vertices; | ||
| 66 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | ||
| 67 | BitField<20, 4, u32> reserved; | ||
| 68 | BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | ||
| 69 | } common4; | ||
| 70 | |||
| 71 | union { | ||
| 72 | struct { | ||
| 73 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 74 | |||
| 75 | union { | ||
| 76 | BitField<0, 1, u8> primitive_array_id; | ||
| 77 | BitField<1, 1, u8> rt_array_index; | ||
| 78 | BitField<2, 1, u8> viewport_index; | ||
| 79 | BitField<3, 1, u8> point_size; | ||
| 80 | BitField<4, 1, u8> position_x; | ||
| 81 | BitField<5, 1, u8> position_y; | ||
| 82 | BitField<6, 1, u8> position_z; | ||
| 83 | BitField<7, 1, u8> position_w; | ||
| 84 | u8 raw; | ||
| 85 | } imap_systemb; | ||
| 86 | |||
| 87 | std::array<u8, 16> imap_generic_vector; | ||
| 88 | |||
| 89 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 90 | union { | ||
| 91 | BitField<0, 8, u16> clip_distances; | ||
| 92 | BitField<8, 1, u16> point_sprite_s; | ||
| 93 | BitField<9, 1, u16> point_sprite_t; | ||
| 94 | BitField<10, 1, u16> fog_coordinate; | ||
| 95 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 96 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 97 | BitField<14, 1, u16> instance_id; | ||
| 98 | BitField<15, 1, u16> vertex_id; | ||
| 99 | }; | ||
| 100 | INSERT_PADDING_BYTES_NOINIT(5); // ImapFixedFncTexture[10] | ||
| 101 | INSERT_PADDING_BYTES_NOINIT(1); // ImapReserved | ||
| 102 | INSERT_PADDING_BYTES_NOINIT(3); // OmapSystemValuesA | ||
| 103 | |||
| 104 | union { | ||
| 105 | BitField<0, 1, u8> primitive_array_id; | ||
| 106 | BitField<1, 1, u8> rt_array_index; | ||
| 107 | BitField<2, 1, u8> viewport_index; | ||
| 108 | BitField<3, 1, u8> point_size; | ||
| 109 | BitField<4, 1, u8> position_x; | ||
| 110 | BitField<5, 1, u8> position_y; | ||
| 111 | BitField<6, 1, u8> position_z; | ||
| 112 | BitField<7, 1, u8> position_w; | ||
| 113 | u8 raw; | ||
| 114 | } omap_systemb; | ||
| 115 | |||
| 116 | std::array<u8, 16> omap_generic_vector; | ||
| 117 | |||
| 118 | INSERT_PADDING_BYTES_NOINIT(2); // OmapColor | ||
| 119 | |||
| 120 | union { | ||
| 121 | BitField<0, 8, u16> clip_distances; | ||
| 122 | BitField<8, 1, u16> point_sprite_s; | ||
| 123 | BitField<9, 1, u16> point_sprite_t; | ||
| 124 | BitField<10, 1, u16> fog_coordinate; | ||
| 125 | BitField<12, 1, u16> tessellation_eval_point_u; | ||
| 126 | BitField<13, 1, u16> tessellation_eval_point_v; | ||
| 127 | BitField<14, 1, u16> instance_id; | ||
| 128 | BitField<15, 1, u16> vertex_id; | ||
| 129 | } omap_systemc; | ||
| 130 | |||
| 131 | INSERT_PADDING_BYTES_NOINIT(5); // OmapFixedFncTexture[10] | ||
| 132 | INSERT_PADDING_BYTES_NOINIT(1); // OmapReserved | ||
| 133 | |||
| 134 | [[nodiscard]] std::array<bool, 4> InputGeneric(size_t index) const noexcept { | ||
| 135 | const int data{imap_generic_vector[index >> 1] >> ((index % 2) * 4)}; | ||
| 136 | return { | ||
| 137 | (data & 1) != 0, | ||
| 138 | (data & 2) != 0, | ||
| 139 | (data & 4) != 0, | ||
| 140 | (data & 8) != 0, | ||
| 141 | }; | ||
| 142 | } | ||
| 143 | |||
| 144 | [[nodiscard]] std::array<bool, 4> OutputGeneric(size_t index) const noexcept { | ||
| 145 | const int data{omap_generic_vector[index >> 1] >> ((index % 2) * 4)}; | ||
| 146 | return { | ||
| 147 | (data & 1) != 0, | ||
| 148 | (data & 2) != 0, | ||
| 149 | (data & 4) != 0, | ||
| 150 | (data & 8) != 0, | ||
| 151 | }; | ||
| 152 | } | ||
| 153 | } vtg; | ||
| 154 | |||
| 155 | struct { | ||
| 156 | INSERT_PADDING_BYTES_NOINIT(3); // ImapSystemValuesA | ||
| 157 | |||
| 158 | union { | ||
| 159 | BitField<0, 1, u8> primitive_array_id; | ||
| 160 | BitField<1, 1, u8> rt_array_index; | ||
| 161 | BitField<2, 1, u8> viewport_index; | ||
| 162 | BitField<3, 1, u8> point_size; | ||
| 163 | BitField<4, 1, u8> position_x; | ||
| 164 | BitField<5, 1, u8> position_y; | ||
| 165 | BitField<6, 1, u8> position_z; | ||
| 166 | BitField<7, 1, u8> position_w; | ||
| 167 | BitField<0, 4, u8> first; | ||
| 168 | BitField<4, 4, u8> position; | ||
| 169 | u8 raw; | ||
| 170 | } imap_systemb; | ||
| 171 | |||
| 172 | union { | ||
| 173 | BitField<0, 2, PixelImap> x; | ||
| 174 | BitField<2, 2, PixelImap> y; | ||
| 175 | BitField<4, 2, PixelImap> z; | ||
| 176 | BitField<6, 2, PixelImap> w; | ||
| 177 | u8 raw; | ||
| 178 | } imap_generic_vector[32]; | ||
| 179 | |||
| 180 | INSERT_PADDING_BYTES_NOINIT(2); // ImapColor | ||
| 181 | INSERT_PADDING_BYTES_NOINIT(2); // ImapSystemValuesC | ||
| 182 | INSERT_PADDING_BYTES_NOINIT(10); // ImapFixedFncTexture[10] | ||
| 183 | INSERT_PADDING_BYTES_NOINIT(2); // ImapReserved | ||
| 184 | |||
| 185 | struct { | ||
| 186 | u32 target; | ||
| 187 | union { | ||
| 188 | BitField<0, 1, u32> sample_mask; | ||
| 189 | BitField<1, 1, u32> depth; | ||
| 190 | BitField<2, 30, u32> reserved; | ||
| 191 | }; | ||
| 192 | } omap; | ||
| 193 | |||
| 194 | [[nodiscard]] std::array<bool, 4> EnabledOutputComponents(u32 rt) const noexcept { | ||
| 195 | const u32 bits{omap.target >> (rt * 4)}; | ||
| 196 | return {(bits & 1) != 0, (bits & 2) != 0, (bits & 4) != 0, (bits & 8) != 0}; | ||
| 197 | } | ||
| 198 | |||
| 199 | [[nodiscard]] std::array<PixelImap, 4> GenericInputMap(u32 attribute) const { | ||
| 200 | const auto& vector{imap_generic_vector[attribute]}; | ||
| 201 | return {vector.x, vector.y, vector.z, vector.w}; | ||
| 202 | } | ||
| 203 | |||
| 204 | [[nodiscard]] bool IsGenericVectorActive(size_t index) const { | ||
| 205 | return imap_generic_vector[index].raw != 0; | ||
| 206 | } | ||
| 207 | } ps; | ||
| 208 | |||
| 209 | std::array<u32, 0xf> raw; | ||
| 210 | }; | ||
| 211 | |||
| 212 | [[nodiscard]] u64 LocalMemorySize() const noexcept { | ||
| 213 | return static_cast<u64>(common1.shader_local_memory_low_size) | | ||
| 214 | (static_cast<u64>(common2.shader_local_memory_high_size) << 24); | ||
| 215 | } | ||
| 216 | }; | ||
| 217 | static_assert(sizeof(ProgramHeader) == 0x50, "Incorrect structure size"); | ||
| 218 | |||
| 219 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h new file mode 100644 index 000000000..f3f83a258 --- /dev/null +++ b/src/shader_recompiler/runtime_info.h | |||
| @@ -0,0 +1,88 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <optional> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/varying_state.h" | ||
| 14 | |||
| 15 | namespace Shader { | ||
| 16 | |||
| 17 | enum class AttributeType : u8 { | ||
| 18 | Float, | ||
| 19 | SignedInt, | ||
| 20 | UnsignedInt, | ||
| 21 | Disabled, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class InputTopology { | ||
| 25 | Points, | ||
| 26 | Lines, | ||
| 27 | LinesAdjacency, | ||
| 28 | Triangles, | ||
| 29 | TrianglesAdjacency, | ||
| 30 | }; | ||
| 31 | |||
| 32 | enum class CompareFunction { | ||
| 33 | Never, | ||
| 34 | Less, | ||
| 35 | Equal, | ||
| 36 | LessThanEqual, | ||
| 37 | Greater, | ||
| 38 | NotEqual, | ||
| 39 | GreaterThanEqual, | ||
| 40 | Always, | ||
| 41 | }; | ||
| 42 | |||
| 43 | enum class TessPrimitive { | ||
| 44 | Isolines, | ||
| 45 | Triangles, | ||
| 46 | Quads, | ||
| 47 | }; | ||
| 48 | |||
| 49 | enum class TessSpacing { | ||
| 50 | Equal, | ||
| 51 | FractionalOdd, | ||
| 52 | FractionalEven, | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct TransformFeedbackVarying { | ||
| 56 | u32 buffer{}; | ||
| 57 | u32 stride{}; | ||
| 58 | u32 offset{}; | ||
| 59 | u32 components{}; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct RuntimeInfo { | ||
| 63 | std::array<AttributeType, 32> generic_input_types{}; | ||
| 64 | VaryingState previous_stage_stores; | ||
| 65 | |||
| 66 | bool convert_depth_mode{}; | ||
| 67 | bool force_early_z{}; | ||
| 68 | |||
| 69 | TessPrimitive tess_primitive{}; | ||
| 70 | TessSpacing tess_spacing{}; | ||
| 71 | bool tess_clockwise{}; | ||
| 72 | |||
| 73 | InputTopology input_topology{}; | ||
| 74 | |||
| 75 | std::optional<float> fixed_state_point_size; | ||
| 76 | std::optional<CompareFunction> alpha_test_func; | ||
| 77 | float alpha_test_reference{}; | ||
| 78 | |||
| 79 | /// Static Y negate value | ||
| 80 | bool y_negate{}; | ||
| 81 | /// Use storage buffers instead of global pointers on GLASM | ||
| 82 | bool glasm_use_storage_buffers{}; | ||
| 83 | |||
| 84 | /// Transform feedback state for each varying | ||
| 85 | std::vector<TransformFeedbackVarying> xfb_varyings; | ||
| 86 | }; | ||
| 87 | |||
| 88 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h new file mode 100644 index 000000000..4ef4dbd40 --- /dev/null +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -0,0 +1,193 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/type.h" | ||
| 12 | #include "shader_recompiler/varying_state.h" | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/container/static_vector.hpp> | ||
| 16 | |||
| 17 | namespace Shader { | ||
| 18 | |||
| 19 | enum class TextureType : u32 { | ||
| 20 | Color1D, | ||
| 21 | ColorArray1D, | ||
| 22 | Color2D, | ||
| 23 | ColorArray2D, | ||
| 24 | Color3D, | ||
| 25 | ColorCube, | ||
| 26 | ColorArrayCube, | ||
| 27 | Buffer, | ||
| 28 | }; | ||
| 29 | constexpr u32 NUM_TEXTURE_TYPES = 8; | ||
| 30 | |||
| 31 | enum class ImageFormat : u32 { | ||
| 32 | Typeless, | ||
| 33 | R8_UINT, | ||
| 34 | R8_SINT, | ||
| 35 | R16_UINT, | ||
| 36 | R16_SINT, | ||
| 37 | R32_UINT, | ||
| 38 | R32G32_UINT, | ||
| 39 | R32G32B32A32_UINT, | ||
| 40 | }; | ||
| 41 | |||
| 42 | enum class Interpolation { | ||
| 43 | Smooth, | ||
| 44 | Flat, | ||
| 45 | NoPerspective, | ||
| 46 | }; | ||
| 47 | |||
| 48 | struct ConstantBufferDescriptor { | ||
| 49 | u32 index; | ||
| 50 | u32 count; | ||
| 51 | }; | ||
| 52 | |||
| 53 | struct StorageBufferDescriptor { | ||
| 54 | u32 cbuf_index; | ||
| 55 | u32 cbuf_offset; | ||
| 56 | u32 count; | ||
| 57 | bool is_written; | ||
| 58 | }; | ||
| 59 | |||
| 60 | struct TextureBufferDescriptor { | ||
| 61 | bool has_secondary; | ||
| 62 | u32 cbuf_index; | ||
| 63 | u32 cbuf_offset; | ||
| 64 | u32 secondary_cbuf_index; | ||
| 65 | u32 secondary_cbuf_offset; | ||
| 66 | u32 count; | ||
| 67 | u32 size_shift; | ||
| 68 | }; | ||
| 69 | using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDescriptor, 6>; | ||
| 70 | |||
| 71 | struct ImageBufferDescriptor { | ||
| 72 | ImageFormat format; | ||
| 73 | bool is_written; | ||
| 74 | bool is_read; | ||
| 75 | u32 cbuf_index; | ||
| 76 | u32 cbuf_offset; | ||
| 77 | u32 count; | ||
| 78 | u32 size_shift; | ||
| 79 | }; | ||
| 80 | using ImageBufferDescriptors = boost::container::small_vector<ImageBufferDescriptor, 2>; | ||
| 81 | |||
| 82 | struct TextureDescriptor { | ||
| 83 | TextureType type; | ||
| 84 | bool is_depth; | ||
| 85 | bool has_secondary; | ||
| 86 | u32 cbuf_index; | ||
| 87 | u32 cbuf_offset; | ||
| 88 | u32 secondary_cbuf_index; | ||
| 89 | u32 secondary_cbuf_offset; | ||
| 90 | u32 count; | ||
| 91 | u32 size_shift; | ||
| 92 | }; | ||
| 93 | using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>; | ||
| 94 | |||
| 95 | struct ImageDescriptor { | ||
| 96 | TextureType type; | ||
| 97 | ImageFormat format; | ||
| 98 | bool is_written; | ||
| 99 | bool is_read; | ||
| 100 | u32 cbuf_index; | ||
| 101 | u32 cbuf_offset; | ||
| 102 | u32 count; | ||
| 103 | u32 size_shift; | ||
| 104 | }; | ||
| 105 | using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; | ||
| 106 | |||
| 107 | struct Info { | ||
| 108 | static constexpr size_t MAX_CBUFS{18}; | ||
| 109 | static constexpr size_t MAX_SSBOS{32}; | ||
| 110 | |||
| 111 | bool uses_workgroup_id{}; | ||
| 112 | bool uses_local_invocation_id{}; | ||
| 113 | bool uses_invocation_id{}; | ||
| 114 | bool uses_sample_id{}; | ||
| 115 | bool uses_is_helper_invocation{}; | ||
| 116 | bool uses_subgroup_invocation_id{}; | ||
| 117 | bool uses_subgroup_shuffles{}; | ||
| 118 | std::array<bool, 30> uses_patches{}; | ||
| 119 | |||
| 120 | std::array<Interpolation, 32> interpolation{}; | ||
| 121 | VaryingState loads; | ||
| 122 | VaryingState stores; | ||
| 123 | VaryingState passthrough; | ||
| 124 | |||
| 125 | bool loads_indexed_attributes{}; | ||
| 126 | |||
| 127 | std::array<bool, 8> stores_frag_color{}; | ||
| 128 | bool stores_sample_mask{}; | ||
| 129 | bool stores_frag_depth{}; | ||
| 130 | |||
| 131 | bool stores_tess_level_outer{}; | ||
| 132 | bool stores_tess_level_inner{}; | ||
| 133 | |||
| 134 | bool stores_indexed_attributes{}; | ||
| 135 | |||
| 136 | bool stores_global_memory{}; | ||
| 137 | |||
| 138 | bool uses_fp16{}; | ||
| 139 | bool uses_fp64{}; | ||
| 140 | bool uses_fp16_denorms_flush{}; | ||
| 141 | bool uses_fp16_denorms_preserve{}; | ||
| 142 | bool uses_fp32_denorms_flush{}; | ||
| 143 | bool uses_fp32_denorms_preserve{}; | ||
| 144 | bool uses_int8{}; | ||
| 145 | bool uses_int16{}; | ||
| 146 | bool uses_int64{}; | ||
| 147 | bool uses_image_1d{}; | ||
| 148 | bool uses_sampled_1d{}; | ||
| 149 | bool uses_sparse_residency{}; | ||
| 150 | bool uses_demote_to_helper_invocation{}; | ||
| 151 | bool uses_subgroup_vote{}; | ||
| 152 | bool uses_subgroup_mask{}; | ||
| 153 | bool uses_fswzadd{}; | ||
| 154 | bool uses_derivatives{}; | ||
| 155 | bool uses_typeless_image_reads{}; | ||
| 156 | bool uses_typeless_image_writes{}; | ||
| 157 | bool uses_image_buffers{}; | ||
| 158 | bool uses_shared_increment{}; | ||
| 159 | bool uses_shared_decrement{}; | ||
| 160 | bool uses_global_increment{}; | ||
| 161 | bool uses_global_decrement{}; | ||
| 162 | bool uses_atomic_f32_add{}; | ||
| 163 | bool uses_atomic_f16x2_add{}; | ||
| 164 | bool uses_atomic_f16x2_min{}; | ||
| 165 | bool uses_atomic_f16x2_max{}; | ||
| 166 | bool uses_atomic_f32x2_add{}; | ||
| 167 | bool uses_atomic_f32x2_min{}; | ||
| 168 | bool uses_atomic_f32x2_max{}; | ||
| 169 | bool uses_atomic_s32_min{}; | ||
| 170 | bool uses_atomic_s32_max{}; | ||
| 171 | bool uses_int64_bit_atomics{}; | ||
| 172 | bool uses_global_memory{}; | ||
| 173 | bool uses_atomic_image_u32{}; | ||
| 174 | bool uses_shadow_lod{}; | ||
| 175 | |||
| 176 | IR::Type used_constant_buffer_types{}; | ||
| 177 | IR::Type used_storage_buffer_types{}; | ||
| 178 | |||
| 179 | u32 constant_buffer_mask{}; | ||
| 180 | std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{}; | ||
| 181 | u32 nvn_buffer_base{}; | ||
| 182 | std::bitset<16> nvn_buffer_used{}; | ||
| 183 | |||
| 184 | boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | ||
| 185 | constant_buffer_descriptors; | ||
| 186 | boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | ||
| 187 | TextureBufferDescriptors texture_buffer_descriptors; | ||
| 188 | ImageBufferDescriptors image_buffer_descriptors; | ||
| 189 | TextureDescriptors texture_descriptors; | ||
| 190 | ImageDescriptors image_descriptors; | ||
| 191 | }; | ||
| 192 | |||
| 193 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/stage.h b/src/shader_recompiler/stage.h new file mode 100644 index 000000000..5c1c8d8fc --- /dev/null +++ b/src/shader_recompiler/stage.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Shader { | ||
| 10 | |||
| 11 | enum class Stage : u32 { | ||
| 12 | VertexB, | ||
| 13 | TessellationControl, | ||
| 14 | TessellationEval, | ||
| 15 | Geometry, | ||
| 16 | Fragment, | ||
| 17 | |||
| 18 | Compute, | ||
| 19 | |||
| 20 | VertexA, | ||
| 21 | }; | ||
| 22 | constexpr u32 MaxStageTypes = 6; | ||
| 23 | |||
| 24 | [[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept { | ||
| 25 | return static_cast<Stage>(static_cast<size_t>(Stage::VertexB) + index); | ||
| 26 | } | ||
| 27 | |||
| 28 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/varying_state.h b/src/shader_recompiler/varying_state.h new file mode 100644 index 000000000..9d7b24a76 --- /dev/null +++ b/src/shader_recompiler/varying_state.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <cstddef> | ||
| 9 | |||
| 10 | #include "shader_recompiler/frontend/ir/attribute.h" | ||
| 11 | |||
| 12 | namespace Shader { | ||
| 13 | |||
| 14 | struct VaryingState { | ||
| 15 | std::bitset<256> mask{}; | ||
| 16 | |||
| 17 | void Set(IR::Attribute attribute, bool state = true) { | ||
| 18 | mask[static_cast<size_t>(attribute)] = state; | ||
| 19 | } | ||
| 20 | |||
| 21 | [[nodiscard]] bool operator[](IR::Attribute attribute) const noexcept { | ||
| 22 | return mask[static_cast<size_t>(attribute)]; | ||
| 23 | } | ||
| 24 | |||
| 25 | [[nodiscard]] bool AnyComponent(IR::Attribute base) const noexcept { | ||
| 26 | return mask[static_cast<size_t>(base) + 0] || mask[static_cast<size_t>(base) + 1] || | ||
| 27 | mask[static_cast<size_t>(base) + 2] || mask[static_cast<size_t>(base) + 3]; | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] bool AllComponents(IR::Attribute base) const noexcept { | ||
| 31 | return mask[static_cast<size_t>(base) + 0] && mask[static_cast<size_t>(base) + 1] && | ||
| 32 | mask[static_cast<size_t>(base) + 2] && mask[static_cast<size_t>(base) + 3]; | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] bool IsUniform(IR::Attribute base) const noexcept { | ||
| 36 | return AnyComponent(base) == AllComponents(base); | ||
| 37 | } | ||
| 38 | |||
| 39 | [[nodiscard]] bool Generic(size_t index, size_t component) const noexcept { | ||
| 40 | return mask[static_cast<size_t>(IR::Attribute::Generic0X) + index * 4 + component]; | ||
| 41 | } | ||
| 42 | |||
| 43 | [[nodiscard]] bool Generic(size_t index) const noexcept { | ||
| 44 | return Generic(index, 0) || Generic(index, 1) || Generic(index, 2) || Generic(index, 3); | ||
| 45 | } | ||
| 46 | |||
| 47 | [[nodiscard]] bool ClipDistances() const noexcept { | ||
| 48 | return AnyComponent(IR::Attribute::ClipDistance0) || | ||
| 49 | AnyComponent(IR::Attribute::ClipDistance4); | ||
| 50 | } | ||
| 51 | |||
| 52 | [[nodiscard]] bool Legacy() const noexcept { | ||
| 53 | return AnyComponent(IR::Attribute::ColorFrontDiffuseR) || | ||
| 54 | AnyComponent(IR::Attribute::ColorFrontSpecularR) || | ||
| 55 | AnyComponent(IR::Attribute::ColorBackDiffuseR) || | ||
| 56 | AnyComponent(IR::Attribute::ColorBackSpecularR) || FixedFunctionTexture(); | ||
| 57 | } | ||
| 58 | |||
| 59 | [[nodiscard]] bool FixedFunctionTexture() const noexcept { | ||
| 60 | for (size_t index = 0; index < 10; ++index) { | ||
| 61 | if (AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { | ||
| 62 | return true; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | return false; | ||
| 66 | } | ||
| 67 | }; | ||
| 68 | |||
| 69 | } // namespace Shader | ||