diff options
Diffstat (limited to 'src/shader_recompiler/backend/glasm')
24 files changed, 5451 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp new file mode 100644 index 000000000..069c019ad --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/bindings.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | std::string_view InterpDecorator(Interpolation interp) { | ||
| 16 | switch (interp) { | ||
| 17 | case Interpolation::Smooth: | ||
| 18 | return ""; | ||
| 19 | case Interpolation::Flat: | ||
| 20 | return "FLAT "; | ||
| 21 | case Interpolation::NoPerspective: | ||
| 22 | return "NOPERSPECTIVE "; | ||
| 23 | } | ||
| 24 | throw InvalidArgument("Invalid interpolation {}", interp); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool IsInputArray(Stage stage) { | ||
| 28 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 29 | stage == Stage::TessellationEval; | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 34 | const RuntimeInfo& runtime_info_) | ||
| 35 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { | ||
| 36 | // FIXME: Temporary partial implementation | ||
| 37 | u32 cbuf_index{}; | ||
| 38 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 39 | if (desc.count != 1) { | ||
| 40 | throw NotImplementedException("Constant buffer descriptor array"); | ||
| 41 | } | ||
| 42 | Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); | ||
| 43 | ++cbuf_index; | ||
| 44 | } | ||
| 45 | u32 ssbo_index{}; | ||
| 46 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 47 | if (desc.count != 1) { | ||
| 48 | throw NotImplementedException("Storage buffer descriptor array"); | ||
| 49 | } | ||
| 50 | if (runtime_info.glasm_use_storage_buffers) { | ||
| 51 | Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); | ||
| 52 | ++bindings.storage_buffer; | ||
| 53 | ++ssbo_index; | ||
| 54 | } | ||
| 55 | } | ||
| 56 | if (!runtime_info.glasm_use_storage_buffers) { | ||
| 57 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { | ||
| 58 | Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | stage = program.stage; | ||
| 62 | switch (program.stage) { | ||
| 63 | case Stage::VertexA: | ||
| 64 | case Stage::VertexB: | ||
| 65 | stage_name = "vertex"; | ||
| 66 | attrib_name = "vertex"; | ||
| 67 | break; | ||
| 68 | case Stage::TessellationControl: | ||
| 69 | case Stage::TessellationEval: | ||
| 70 | stage_name = "primitive"; | ||
| 71 | attrib_name = "primitive"; | ||
| 72 | break; | ||
| 73 | case Stage::Geometry: | ||
| 74 | stage_name = "primitive"; | ||
| 75 | attrib_name = "vertex"; | ||
| 76 | break; | ||
| 77 | case Stage::Fragment: | ||
| 78 | stage_name = "fragment"; | ||
| 79 | attrib_name = "fragment"; | ||
| 80 | break; | ||
| 81 | case Stage::Compute: | ||
| 82 | stage_name = "invocation"; | ||
| 83 | break; | ||
| 84 | } | ||
| 85 | const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; | ||
| 86 | const VaryingState loads{info.loads.mask | info.passthrough.mask}; | ||
| 87 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 88 | if (loads.Generic(index)) { | ||
| 89 | Add("{}ATTRIB in_attr{}[]={{{}.attrib[{}..{}]}};", | ||
| 90 | InterpDecorator(info.interpolation[index]), index, attr_stage, index, index); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | if (IsInputArray(stage) && loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 94 | Add("ATTRIB vertex_position=vertex.position;"); | ||
| 95 | } | ||
| 96 | if (info.uses_invocation_id) { | ||
| 97 | Add("ATTRIB primitive_invocation=primitive.invocation;"); | ||
| 98 | } | ||
| 99 | if (info.stores_tess_level_outer) { | ||
| 100 | Add("OUTPUT result_patch_tessouter[]={{result.patch.tessouter[0..3]}};"); | ||
| 101 | } | ||
| 102 | if (info.stores_tess_level_inner) { | ||
| 103 | Add("OUTPUT result_patch_tessinner[]={{result.patch.tessinner[0..1]}};"); | ||
| 104 | } | ||
| 105 | if (info.stores.ClipDistances()) { | ||
| 106 | Add("OUTPUT result_clip[]={{result.clip[0..7]}};"); | ||
| 107 | } | ||
| 108 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 109 | if (!info.uses_patches[index]) { | ||
| 110 | continue; | ||
| 111 | } | ||
| 112 | if (stage == Stage::TessellationControl) { | ||
| 113 | Add("OUTPUT result_patch_attrib{}[]={{result.patch.attrib[{}..{}]}};" | ||
| 114 | "ATTRIB primitive_out_patch_attrib{}[]={{primitive.out.patch.attrib[{}..{}]}};", | ||
| 115 | index, index, index, index, index, index); | ||
| 116 | } else { | ||
| 117 | Add("ATTRIB primitive_patch_attrib{}[]={{primitive.patch.attrib[{}..{}]}};", index, | ||
| 118 | index, index); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | if (stage == Stage::Fragment) { | ||
| 122 | Add("OUTPUT frag_color0=result.color;"); | ||
| 123 | for (size_t index = 1; index < info.stores_frag_color.size(); ++index) { | ||
| 124 | Add("OUTPUT frag_color{}=result.color[{}];", index, index); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 128 | if (info.stores.Generic(index)) { | ||
| 129 | Add("OUTPUT out_attr{}[]={{result.attrib[{}..{}]}};", index, index, index); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | image_buffer_bindings.reserve(info.image_buffer_descriptors.size()); | ||
| 133 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 134 | image_buffer_bindings.push_back(bindings.image); | ||
| 135 | bindings.image += desc.count; | ||
| 136 | } | ||
| 137 | image_bindings.reserve(info.image_descriptors.size()); | ||
| 138 | for (const auto& desc : info.image_descriptors) { | ||
| 139 | image_bindings.push_back(bindings.image); | ||
| 140 | bindings.image += desc.count; | ||
| 141 | } | ||
| 142 | texture_buffer_bindings.reserve(info.texture_buffer_descriptors.size()); | ||
| 143 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 144 | texture_buffer_bindings.push_back(bindings.texture); | ||
| 145 | bindings.texture += desc.count; | ||
| 146 | } | ||
| 147 | texture_bindings.reserve(info.texture_descriptors.size()); | ||
| 148 | for (const auto& desc : info.texture_descriptors) { | ||
| 149 | texture_bindings.push_back(bindings.texture); | ||
| 150 | bindings.texture += desc.count; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_context.h b/src/shader_recompiler/backend/glasm/emit_context.h new file mode 100644 index 000000000..8433e5c00 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_context.h | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 14 | #include "shader_recompiler/stage.h" | ||
| 15 | |||
| 16 | namespace Shader { | ||
| 17 | struct Info; | ||
| 18 | struct Profile; | ||
| 19 | struct RuntimeInfo; | ||
| 20 | } // namespace Shader | ||
| 21 | |||
| 22 | namespace Shader::Backend { | ||
| 23 | struct Bindings; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace Shader::IR { | ||
| 27 | class Inst; | ||
| 28 | struct Program; | ||
| 29 | } // namespace Shader::IR | ||
| 30 | |||
| 31 | namespace Shader::Backend::GLASM { | ||
| 32 | |||
| 33 | class EmitContext { | ||
| 34 | public: | ||
| 35 | explicit EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 36 | const RuntimeInfo& runtime_info_); | ||
| 37 | |||
| 38 | template <typename... Args> | ||
| 39 | void Add(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 40 | code += fmt::format(fmt::runtime(format_str), reg_alloc.Define(inst), | ||
| 41 | std::forward<Args>(args)...); | ||
| 42 | // TODO: Remove this | ||
| 43 | code += '\n'; | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename... Args> | ||
| 47 | void LongAdd(const char* format_str, IR::Inst& inst, Args&&... args) { | ||
| 48 | code += fmt::format(fmt::runtime(format_str), reg_alloc.LongDefine(inst), | ||
| 49 | std::forward<Args>(args)...); | ||
| 50 | // TODO: Remove this | ||
| 51 | code += '\n'; | ||
| 52 | } | ||
| 53 | |||
| 54 | template <typename... Args> | ||
| 55 | void Add(const char* format_str, Args&&... args) { | ||
| 56 | code += fmt::format(fmt::runtime(format_str), std::forward<Args>(args)...); | ||
| 57 | // TODO: Remove this | ||
| 58 | code += '\n'; | ||
| 59 | } | ||
| 60 | |||
| 61 | std::string code; | ||
| 62 | RegAlloc reg_alloc{}; | ||
| 63 | const Info& info; | ||
| 64 | const Profile& profile; | ||
| 65 | const RuntimeInfo& runtime_info; | ||
| 66 | |||
| 67 | std::vector<u32> texture_buffer_bindings; | ||
| 68 | std::vector<u32> image_buffer_bindings; | ||
| 69 | std::vector<u32> texture_bindings; | ||
| 70 | std::vector<u32> image_bindings; | ||
| 71 | |||
| 72 | Stage stage{}; | ||
| 73 | std::string_view stage_name = "invalid"; | ||
| 74 | std::string_view attrib_name = "invalid"; | ||
| 75 | |||
| 76 | u32 num_safety_loop_vars{}; | ||
| 77 | bool uses_y_direction{}; | ||
| 78 | }; | ||
| 79 | |||
| 80 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp new file mode 100644 index 000000000..a5e8c9b6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp | |||
| @@ -0,0 +1,492 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | |||
| 9 | #include "common/div_ceil.h" | ||
| 10 | #include "common/settings.h" | ||
| 11 | #include "shader_recompiler/backend/bindings.h" | ||
| 12 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 13 | #include "shader_recompiler/backend/glasm/emit_glasm.h" | ||
| 14 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 15 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 16 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 17 | #include "shader_recompiler/profile.h" | ||
| 18 | #include "shader_recompiler/runtime_info.h" | ||
| 19 | |||
| 20 | namespace Shader::Backend::GLASM { | ||
| 21 | namespace { | ||
| 22 | template <class Func> | ||
| 23 | struct FuncTraits {}; | ||
| 24 | |||
| 25 | template <class ReturnType_, class... Args> | ||
| 26 | struct FuncTraits<ReturnType_ (*)(Args...)> { | ||
| 27 | using ReturnType = ReturnType_; | ||
| 28 | |||
| 29 | static constexpr size_t NUM_ARGS = sizeof...(Args); | ||
| 30 | |||
| 31 | template <size_t I> | ||
| 32 | using ArgType = std::tuple_element_t<I, std::tuple<Args...>>; | ||
| 33 | }; | ||
| 34 | |||
| 35 | template <typename T> | ||
| 36 | struct Identity { | ||
| 37 | Identity(T data_) : data{data_} {} | ||
| 38 | |||
| 39 | T Extract() { | ||
| 40 | return data; | ||
| 41 | } | ||
| 42 | |||
| 43 | T data; | ||
| 44 | }; | ||
| 45 | |||
| 46 | template <bool scalar> | ||
| 47 | class RegWrapper { | ||
| 48 | public: | ||
| 49 | RegWrapper(EmitContext& ctx, const IR::Value& ir_value) : reg_alloc{ctx.reg_alloc} { | ||
| 50 | const Value value{reg_alloc.Peek(ir_value)}; | ||
| 51 | if (value.type == Type::Register) { | ||
| 52 | inst = ir_value.InstRecursive(); | ||
| 53 | reg = Register{value}; | ||
| 54 | } else { | ||
| 55 | reg = value.type == Type::U64 ? reg_alloc.AllocLongReg() : reg_alloc.AllocReg(); | ||
| 56 | } | ||
| 57 | switch (value.type) { | ||
| 58 | case Type::Register: | ||
| 59 | case Type::Void: | ||
| 60 | break; | ||
| 61 | case Type::U32: | ||
| 62 | ctx.Add("MOV.U {}.x,{};", reg, value.imm_u32); | ||
| 63 | break; | ||
| 64 | case Type::U64: | ||
| 65 | ctx.Add("MOV.U64 {}.x,{};", reg, value.imm_u64); | ||
| 66 | break; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | auto Extract() { | ||
| 71 | if (inst) { | ||
| 72 | reg_alloc.Unref(*inst); | ||
| 73 | } else { | ||
| 74 | reg_alloc.FreeReg(reg); | ||
| 75 | } | ||
| 76 | return std::conditional_t<scalar, ScalarRegister, Register>{Value{reg}}; | ||
| 77 | } | ||
| 78 | |||
| 79 | private: | ||
| 80 | RegAlloc& reg_alloc; | ||
| 81 | IR::Inst* inst{}; | ||
| 82 | Register reg{}; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename ArgType> | ||
| 86 | class ValueWrapper { | ||
| 87 | public: | ||
| 88 | ValueWrapper(EmitContext& ctx, const IR::Value& ir_value_) | ||
| 89 | : reg_alloc{ctx.reg_alloc}, ir_value{ir_value_}, value{reg_alloc.Peek(ir_value)} {} | ||
| 90 | |||
| 91 | ArgType Extract() { | ||
| 92 | if (!ir_value.IsImmediate()) { | ||
| 93 | reg_alloc.Unref(*ir_value.InstRecursive()); | ||
| 94 | } | ||
| 95 | return value; | ||
| 96 | } | ||
| 97 | |||
| 98 | private: | ||
| 99 | RegAlloc& reg_alloc; | ||
| 100 | const IR::Value& ir_value; | ||
| 101 | ArgType value; | ||
| 102 | }; | ||
| 103 | |||
| 104 | template <typename ArgType> | ||
| 105 | auto Arg(EmitContext& ctx, const IR::Value& arg) { | ||
| 106 | if constexpr (std::is_same_v<ArgType, Register>) { | ||
| 107 | return RegWrapper<false>{ctx, arg}; | ||
| 108 | } else if constexpr (std::is_same_v<ArgType, ScalarRegister>) { | ||
| 109 | return RegWrapper<true>{ctx, arg}; | ||
| 110 | } else if constexpr (std::is_base_of_v<Value, ArgType>) { | ||
| 111 | return ValueWrapper<ArgType>{ctx, arg}; | ||
| 112 | } else if constexpr (std::is_same_v<ArgType, const IR::Value&>) { | ||
| 113 | return Identity<const IR::Value&>{arg}; | ||
| 114 | } else if constexpr (std::is_same_v<ArgType, u32>) { | ||
| 115 | return Identity{arg.U32()}; | ||
| 116 | } else if constexpr (std::is_same_v<ArgType, IR::Attribute>) { | ||
| 117 | return Identity{arg.Attribute()}; | ||
| 118 | } else if constexpr (std::is_same_v<ArgType, IR::Patch>) { | ||
| 119 | return Identity{arg.Patch()}; | ||
| 120 | } else if constexpr (std::is_same_v<ArgType, IR::Reg>) { | ||
| 121 | return Identity{arg.Reg()}; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | template <auto func, bool is_first_arg_inst> | ||
| 126 | struct InvokeCall { | ||
| 127 | template <typename... Args> | ||
| 128 | InvokeCall(EmitContext& ctx, IR::Inst* inst, Args&&... args) { | ||
| 129 | if constexpr (is_first_arg_inst) { | ||
| 130 | func(ctx, *inst, args.Extract()...); | ||
| 131 | } else { | ||
| 132 | func(ctx, args.Extract()...); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | }; | ||
| 136 | |||
| 137 | template <auto func, bool is_first_arg_inst, size_t... I> | ||
| 138 | void Invoke(EmitContext& ctx, IR::Inst* inst, std::index_sequence<I...>) { | ||
| 139 | using Traits = FuncTraits<decltype(func)>; | ||
| 140 | if constexpr (is_first_arg_inst) { | ||
| 141 | InvokeCall<func, is_first_arg_inst>{ | ||
| 142 | ctx, inst, Arg<typename Traits::template ArgType<I + 2>>(ctx, inst->Arg(I))...}; | ||
| 143 | } else { | ||
| 144 | InvokeCall<func, is_first_arg_inst>{ | ||
| 145 | ctx, inst, Arg<typename Traits::template ArgType<I + 1>>(ctx, inst->Arg(I))...}; | ||
| 146 | } | ||
| 147 | } | ||
| 148 | |||
| 149 | template <auto func> | ||
| 150 | void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||
| 151 | using Traits = FuncTraits<decltype(func)>; | ||
| 152 | static_assert(Traits::NUM_ARGS >= 1, "Insufficient arguments"); | ||
| 153 | if constexpr (Traits::NUM_ARGS == 1) { | ||
| 154 | Invoke<func, false>(ctx, inst, std::make_index_sequence<0>{}); | ||
| 155 | } else { | ||
| 156 | using FirstArgType = typename Traits::template ArgType<1>; | ||
| 157 | static constexpr bool is_first_arg_inst = std::is_same_v<FirstArgType, IR::Inst&>; | ||
| 158 | using Indices = std::make_index_sequence<Traits::NUM_ARGS - (is_first_arg_inst ? 2 : 1)>; | ||
| 159 | Invoke<func, is_first_arg_inst>(ctx, inst, Indices{}); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||
| 164 | switch (inst->GetOpcode()) { | ||
| 165 | #define OPCODE(name, result_type, ...) \ | ||
| 166 | case IR::Opcode::name: \ | ||
| 167 | return Invoke<&Emit##name>(ctx, inst); | ||
| 168 | #include "shader_recompiler/frontend/ir/opcodes.inc" | ||
| 169 | #undef OPCODE | ||
| 170 | } | ||
| 171 | throw LogicError("Invalid opcode {}", inst->GetOpcode()); | ||
| 172 | } | ||
| 173 | |||
| 174 | bool IsReference(IR::Inst& inst) { | ||
| 175 | return inst.GetOpcode() == IR::Opcode::Reference; | ||
| 176 | } | ||
| 177 | |||
| 178 | void PrecolorInst(IR::Inst& phi) { | ||
| 179 | // Insert phi moves before references to avoid overwritting other phis | ||
| 180 | const size_t num_args{phi.NumArgs()}; | ||
| 181 | for (size_t i = 0; i < num_args; ++i) { | ||
| 182 | IR::Block& phi_block{*phi.PhiBlock(i)}; | ||
| 183 | auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; | ||
| 184 | IR::IREmitter ir{phi_block, it}; | ||
| 185 | const IR::Value arg{phi.Arg(i)}; | ||
| 186 | if (arg.IsImmediate()) { | ||
| 187 | ir.PhiMove(phi, arg); | ||
| 188 | } else { | ||
| 189 | ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); | ||
| 190 | } | ||
| 191 | } | ||
| 192 | for (size_t i = 0; i < num_args; ++i) { | ||
| 193 | IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | void Precolor(const IR::Program& program) { | ||
| 198 | for (IR::Block* const block : program.blocks) { | ||
| 199 | for (IR::Inst& phi : block->Instructions()) { | ||
| 200 | if (!IR::IsPhi(phi)) { | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | PrecolorInst(phi); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||
| 209 | const auto eval{ | ||
| 210 | [&](const IR::U1& cond) { return ScalarS32{ctx.reg_alloc.Consume(IR::Value{cond})}; }}; | ||
| 211 | for (const IR::AbstractSyntaxNode& node : program.syntax_list) { | ||
| 212 | switch (node.type) { | ||
| 213 | case IR::AbstractSyntaxNode::Type::Block: | ||
| 214 | for (IR::Inst& inst : node.data.block->Instructions()) { | ||
| 215 | EmitInst(ctx, &inst); | ||
| 216 | } | ||
| 217 | break; | ||
| 218 | case IR::AbstractSyntaxNode::Type::If: | ||
| 219 | ctx.Add("MOV.S.CC RC,{};" | ||
| 220 | "IF NE.x;", | ||
| 221 | eval(node.data.if_node.cond)); | ||
| 222 | break; | ||
| 223 | case IR::AbstractSyntaxNode::Type::EndIf: | ||
| 224 | ctx.Add("ENDIF;"); | ||
| 225 | break; | ||
| 226 | case IR::AbstractSyntaxNode::Type::Loop: | ||
| 227 | ctx.Add("REP;"); | ||
| 228 | break; | ||
| 229 | case IR::AbstractSyntaxNode::Type::Repeat: | ||
| 230 | if (!Settings::values.disable_shader_loop_safety_checks) { | ||
| 231 | const u32 loop_index{ctx.num_safety_loop_vars++}; | ||
| 232 | const u32 vector_index{loop_index / 4}; | ||
| 233 | const char component{"xyzw"[loop_index % 4]}; | ||
| 234 | ctx.Add("SUB.S.CC loop{}.{},loop{}.{},1;" | ||
| 235 | "BRK(LT.{});", | ||
| 236 | vector_index, component, vector_index, component, component); | ||
| 237 | } | ||
| 238 | if (node.data.repeat.cond.IsImmediate()) { | ||
| 239 | if (node.data.repeat.cond.U1()) { | ||
| 240 | ctx.Add("ENDREP;"); | ||
| 241 | } else { | ||
| 242 | ctx.Add("BRK;" | ||
| 243 | "ENDREP;"); | ||
| 244 | } | ||
| 245 | } else { | ||
| 246 | ctx.Add("MOV.S.CC RC,{};" | ||
| 247 | "BRK(EQ.x);" | ||
| 248 | "ENDREP;", | ||
| 249 | eval(node.data.repeat.cond)); | ||
| 250 | } | ||
| 251 | break; | ||
| 252 | case IR::AbstractSyntaxNode::Type::Break: | ||
| 253 | if (node.data.break_node.cond.IsImmediate()) { | ||
| 254 | if (node.data.break_node.cond.U1()) { | ||
| 255 | ctx.Add("BRK;"); | ||
| 256 | } | ||
| 257 | } else { | ||
| 258 | ctx.Add("MOV.S.CC RC,{};" | ||
| 259 | "BRK (NE.x);", | ||
| 260 | eval(node.data.break_node.cond)); | ||
| 261 | } | ||
| 262 | break; | ||
| 263 | case IR::AbstractSyntaxNode::Type::Return: | ||
| 264 | case IR::AbstractSyntaxNode::Type::Unreachable: | ||
| 265 | ctx.Add("RET;"); | ||
| 266 | break; | ||
| 267 | } | ||
| 268 | } | ||
| 269 | if (!ctx.reg_alloc.IsEmpty()) { | ||
| 270 | LOG_WARNING(Shader_GLASM, "Register leak after generating code"); | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | void SetupOptions(const IR::Program& program, const Profile& profile, | ||
| 275 | const RuntimeInfo& runtime_info, std::string& header) { | ||
| 276 | const Info& info{program.info}; | ||
| 277 | const Stage stage{program.stage}; | ||
| 278 | |||
| 279 | // TODO: Track the shared atomic ops | ||
| 280 | header += "OPTION NV_internal;" | ||
| 281 | "OPTION NV_shader_storage_buffer;" | ||
| 282 | "OPTION NV_gpu_program_fp64;"; | ||
| 283 | if (info.uses_int64_bit_atomics) { | ||
| 284 | header += "OPTION NV_shader_atomic_int64;"; | ||
| 285 | } | ||
| 286 | if (info.uses_atomic_f32_add) { | ||
| 287 | header += "OPTION NV_shader_atomic_float;"; | ||
| 288 | } | ||
| 289 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 290 | header += "OPTION NV_shader_atomic_fp16_vector;"; | ||
| 291 | } | ||
| 292 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 293 | info.uses_fswzadd) { | ||
| 294 | header += "OPTION NV_shader_thread_group;"; | ||
| 295 | } | ||
| 296 | if (info.uses_subgroup_shuffles) { | ||
| 297 | header += "OPTION NV_shader_thread_shuffle;"; | ||
| 298 | } | ||
| 299 | if (info.uses_sparse_residency) { | ||
| 300 | header += "OPTION EXT_sparse_texture2;"; | ||
| 301 | } | ||
| 302 | const bool stores_viewport_layer{info.stores[IR::Attribute::ViewportIndex] || | ||
| 303 | info.stores[IR::Attribute::Layer]}; | ||
| 304 | if ((stage != Stage::Geometry && stores_viewport_layer) || | ||
| 305 | info.stores[IR::Attribute::ViewportMask]) { | ||
| 306 | if (profile.support_viewport_index_layer_non_geometry) { | ||
| 307 | header += "OPTION NV_viewport_array2;"; | ||
| 308 | } | ||
| 309 | } | ||
| 310 | if (program.is_geometry_passthrough && profile.support_geometry_shader_passthrough) { | ||
| 311 | header += "OPTION NV_geometry_shader_passthrough;"; | ||
| 312 | } | ||
| 313 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||
| 314 | header += "OPTION EXT_shader_image_load_formatted;"; | ||
| 315 | } | ||
| 316 | if (profile.support_derivative_control) { | ||
| 317 | header += "OPTION ARB_derivative_control;"; | ||
| 318 | } | ||
| 319 | if (stage == Stage::Fragment && runtime_info.force_early_z != 0) { | ||
| 320 | header += "OPTION NV_early_fragment_tests;"; | ||
| 321 | } | ||
| 322 | if (stage == Stage::Fragment) { | ||
| 323 | header += "OPTION ARB_draw_buffers;"; | ||
| 324 | } | ||
| 325 | } | ||
| 326 | |||
| 327 | std::string_view StageHeader(Stage stage) { | ||
| 328 | switch (stage) { | ||
| 329 | case Stage::VertexA: | ||
| 330 | case Stage::VertexB: | ||
| 331 | return "!!NVvp5.0\n"; | ||
| 332 | case Stage::TessellationControl: | ||
| 333 | return "!!NVtcp5.0\n"; | ||
| 334 | case Stage::TessellationEval: | ||
| 335 | return "!!NVtep5.0\n"; | ||
| 336 | case Stage::Geometry: | ||
| 337 | return "!!NVgp5.0\n"; | ||
| 338 | case Stage::Fragment: | ||
| 339 | return "!!NVfp5.0\n"; | ||
| 340 | case Stage::Compute: | ||
| 341 | return "!!NVcp5.0\n"; | ||
| 342 | } | ||
| 343 | throw InvalidArgument("Invalid stage {}", stage); | ||
| 344 | } | ||
| 345 | |||
| 346 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 347 | switch (topology) { | ||
| 348 | case InputTopology::Points: | ||
| 349 | return "POINTS"; | ||
| 350 | case InputTopology::Lines: | ||
| 351 | return "LINES"; | ||
| 352 | case InputTopology::LinesAdjacency: | ||
| 353 | return "LINESS_ADJACENCY"; | ||
| 354 | case InputTopology::Triangles: | ||
| 355 | return "TRIANGLES"; | ||
| 356 | case InputTopology::TrianglesAdjacency: | ||
| 357 | return "TRIANGLES_ADJACENCY"; | ||
| 358 | } | ||
| 359 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 360 | } | ||
| 361 | |||
| 362 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 363 | switch (topology) { | ||
| 364 | case OutputTopology::PointList: | ||
| 365 | return "POINTS"; | ||
| 366 | case OutputTopology::LineStrip: | ||
| 367 | return "LINE_STRIP"; | ||
| 368 | case OutputTopology::TriangleStrip: | ||
| 369 | return "TRIANGLE_STRIP"; | ||
| 370 | } | ||
| 371 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 372 | } | ||
| 373 | |||
| 374 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 375 | switch (primitive) { | ||
| 376 | case TessPrimitive::Triangles: | ||
| 377 | return "TRIANGLES"; | ||
| 378 | case TessPrimitive::Quads: | ||
| 379 | return "QUADS"; | ||
| 380 | case TessPrimitive::Isolines: | ||
| 381 | return "ISOLINES"; | ||
| 382 | } | ||
| 383 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 384 | } | ||
| 385 | |||
| 386 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 387 | switch (spacing) { | ||
| 388 | case TessSpacing::Equal: | ||
| 389 | return "EQUAL"; | ||
| 390 | case TessSpacing::FractionalOdd: | ||
| 391 | return "FRACTIONAL_ODD"; | ||
| 392 | case TessSpacing::FractionalEven: | ||
| 393 | return "FRACTIONAL_EVEN"; | ||
| 394 | } | ||
| 395 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 396 | } | ||
| 397 | } // Anonymous namespace | ||
| 398 | |||
| 399 | std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, IR::Program& program, | ||
| 400 | Bindings& bindings) { | ||
| 401 | EmitContext ctx{program, bindings, profile, runtime_info}; | ||
| 402 | Precolor(program); | ||
| 403 | EmitCode(ctx, program); | ||
| 404 | std::string header{StageHeader(program.stage)}; | ||
| 405 | SetupOptions(program, profile, runtime_info, header); | ||
| 406 | switch (program.stage) { | ||
| 407 | case Stage::TessellationControl: | ||
| 408 | header += fmt::format("VERTICES_OUT {};", program.invocations); | ||
| 409 | break; | ||
| 410 | case Stage::TessellationEval: | ||
| 411 | header += fmt::format("TESS_MODE {};" | ||
| 412 | "TESS_SPACING {};" | ||
| 413 | "TESS_VERTEX_ORDER {};", | ||
| 414 | GetTessMode(runtime_info.tess_primitive), | ||
| 415 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 416 | runtime_info.tess_clockwise ? "CW" : "CCW"); | ||
| 417 | break; | ||
| 418 | case Stage::Geometry: | ||
| 419 | header += fmt::format("PRIMITIVE_IN {};", InputPrimitive(runtime_info.input_topology)); | ||
| 420 | if (program.is_geometry_passthrough) { | ||
| 421 | if (profile.support_geometry_shader_passthrough) { | ||
| 422 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 423 | if (program.info.passthrough.Generic(index)) { | ||
| 424 | header += fmt::format("PASSTHROUGH result.attrib[{}];", index); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | if (program.info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 428 | header += "PASSTHROUGH result.position;"; | ||
| 429 | } | ||
| 430 | } else { | ||
| 431 | LOG_WARNING(Shader_GLASM, "Passthrough geometry program used but not supported"); | ||
| 432 | } | ||
| 433 | } else { | ||
| 434 | header += | ||
| 435 | fmt::format("VERTICES_OUT {};" | ||
| 436 | "PRIMITIVE_OUT {};", | ||
| 437 | program.output_vertices, OutputPrimitive(program.output_topology)); | ||
| 438 | } | ||
| 439 | break; | ||
| 440 | case Stage::Compute: | ||
| 441 | header += fmt::format("GROUP_SIZE {} {} {};", program.workgroup_size[0], | ||
| 442 | program.workgroup_size[1], program.workgroup_size[2]); | ||
| 443 | break; | ||
| 444 | default: | ||
| 445 | break; | ||
| 446 | } | ||
| 447 | if (program.shared_memory_size > 0) { | ||
| 448 | header += fmt::format("SHARED_MEMORY {};", program.shared_memory_size); | ||
| 449 | header += fmt::format("SHARED shared_mem[]={{program.sharedmem}};"); | ||
| 450 | } | ||
| 451 | header += "TEMP "; | ||
| 452 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedRegisters(); ++index) { | ||
| 453 | header += fmt::format("R{},", index); | ||
| 454 | } | ||
| 455 | if (program.local_memory_size > 0) { | ||
| 456 | header += fmt::format("lmem[{}],", program.local_memory_size); | ||
| 457 | } | ||
| 458 | if (program.info.uses_fswzadd) { | ||
| 459 | header += "FSWZA[4],FSWZB[4],"; | ||
| 460 | } | ||
| 461 | const u32 num_safety_loop_vectors{Common::DivCeil(ctx.num_safety_loop_vars, 4u)}; | ||
| 462 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 463 | header += fmt::format("loop{},", index); | ||
| 464 | } | ||
| 465 | header += "RC;" | ||
| 466 | "LONG TEMP "; | ||
| 467 | for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { | ||
| 468 | header += fmt::format("D{},", index); | ||
| 469 | } | ||
| 470 | header += "DC;"; | ||
| 471 | if (program.info.uses_fswzadd) { | ||
| 472 | header += "MOV.F FSWZA[0],-1;" | ||
| 473 | "MOV.F FSWZA[1],1;" | ||
| 474 | "MOV.F FSWZA[2],-1;" | ||
| 475 | "MOV.F FSWZA[3],0;" | ||
| 476 | "MOV.F FSWZB[0],-1;" | ||
| 477 | "MOV.F FSWZB[1],-1;" | ||
| 478 | "MOV.F FSWZB[2],1;" | ||
| 479 | "MOV.F FSWZB[3],-1;"; | ||
| 480 | } | ||
| 481 | for (u32 index = 0; index < num_safety_loop_vectors; ++index) { | ||
| 482 | header += fmt::format("MOV.S loop{},{{0x2000,0x2000,0x2000,0x2000}};", index); | ||
| 483 | } | ||
| 484 | if (ctx.uses_y_direction) { | ||
| 485 | header += "PARAM y_direction[1]={state.material.front.ambient};"; | ||
| 486 | } | ||
| 487 | ctx.code.insert(0, header); | ||
| 488 | ctx.code += "END"; | ||
| 489 | return ctx.code; | ||
| 490 | } | ||
| 491 | |||
| 492 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h new file mode 100644 index 000000000..bcb55f062 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h | |||
| @@ -0,0 +1,25 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/bindings.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 11 | #include "shader_recompiler/profile.h" | ||
| 12 | #include "shader_recompiler/runtime_info.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLASM { | ||
| 15 | |||
| 16 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 17 | IR::Program& program, Bindings& bindings); | ||
| 18 | |||
| 19 | [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | ||
| 20 | IR::Program& program) { | ||
| 21 | Bindings binding; | ||
| 22 | return EmitGLASM(profile, runtime_info, program, binding); | ||
| 23 | } | ||
| 24 | |||
| 25 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_barriers.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp new file mode 100644 index 000000000..9201ccd39 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | |||
| 11 | static void Alias(IR::Inst& inst, const IR::Value& value) { | ||
| 12 | if (value.IsImmediate()) { | ||
| 13 | return; | ||
| 14 | } | ||
| 15 | IR::Inst& value_inst{RegAlloc::AliasInst(*value.Inst())}; | ||
| 16 | value_inst.DestructiveAddUsage(inst.UseCount()); | ||
| 17 | value_inst.DestructiveRemoveUsage(); | ||
| 18 | inst.SetDefinition(value_inst.Definition<Id>()); | ||
| 19 | } | ||
| 20 | |||
| 21 | void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 22 | Alias(inst, value); | ||
| 23 | } | ||
| 24 | |||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { | ||
| 26 | // Fake one usage to get a real register out of the condition | ||
| 27 | inst.DestructiveAddUsage(1); | ||
| 28 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 29 | const ScalarS32 input{ctx.reg_alloc.Consume(value)}; | ||
| 30 | if (ret != input) { | ||
| 31 | ctx.Add("MOV.S {},{};", ret, input); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 36 | Alias(inst, value); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitBitCastU32F32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 40 | Alias(inst, value); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitBitCastU64F64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 44 | Alias(inst, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitBitCastF16U16(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 48 | Alias(inst, value); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitBitCastF32U32(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 52 | Alias(inst, value); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitBitCastF64U64(EmitContext&, IR::Inst& inst, const IR::Value& value) { | ||
| 56 | Alias(inst, value); | ||
| 57 | } | ||
| 58 | |||
| 59 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 60 | ctx.LongAdd("PK64.U {}.x,{};", inst, value); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 64 | ctx.Add("UP64.U {}.xy,{}.x;", inst, value); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 68 | throw NotImplementedException("GLASM instruction"); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 72 | throw NotImplementedException("GLASM instruction"); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | ctx.Add("PK2H {}.x,{};", inst, value); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 80 | ctx.Add("UP2H {}.xy,{}.x;", inst, value); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 84 | ctx.LongAdd("PK64 {}.x,{};", inst, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 88 | ctx.Add("UP64 {}.xy,{}.x;", inst, value); | ||
| 89 | } | ||
| 90 | |||
| 91 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp new file mode 100644 index 000000000..bff0b7c1c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp | |||
| @@ -0,0 +1,244 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | template <auto read_imm, char type, typename... Values> | ||
| 12 | void CompositeConstruct(EmitContext& ctx, IR::Inst& inst, Values&&... elements) { | ||
| 13 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 14 | if (std::ranges::any_of(std::array{elements...}, | ||
| 15 | [](const IR::Value& value) { return value.IsImmediate(); })) { | ||
| 16 | using Type = std::invoke_result_t<decltype(read_imm), IR::Value>; | ||
| 17 | const std::array<Type, 4> values{(elements.IsImmediate() ? (elements.*read_imm)() : 0)...}; | ||
| 18 | ctx.Add("MOV.{} {},{{{},{},{},{}}};", type, ret, fmt::to_string(values[0]), | ||
| 19 | fmt::to_string(values[1]), fmt::to_string(values[2]), fmt::to_string(values[3])); | ||
| 20 | } | ||
| 21 | size_t index{}; | ||
| 22 | for (const IR::Value& element : {elements...}) { | ||
| 23 | if (!element.IsImmediate()) { | ||
| 24 | const ScalarU32 value{ctx.reg_alloc.Consume(element)}; | ||
| 25 | ctx.Add("MOV.{} {}.{},{};", type, ret, "xyzw"[index], value); | ||
| 26 | } | ||
| 27 | ++index; | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void CompositeExtract(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index, char type) { | ||
| 32 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 33 | if (ret == composite && index == 0) { | ||
| 34 | // No need to do anything here, the source and destination are the same register | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | ctx.Add("MOV.{} {}.x,{}.{};", type, ret, composite, "xyzw"[index]); | ||
| 38 | } | ||
| 39 | |||
| 40 | template <typename ObjectType> | ||
| 41 | void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, ObjectType object, | ||
| 42 | u32 index, char type) { | ||
| 43 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 44 | const char swizzle{"xyzw"[index]}; | ||
| 45 | if (ret != composite && ret == object) { | ||
| 46 | // The object is aliased with the return value, so we have to use a temporary to insert | ||
| 47 | ctx.Add("MOV.{} RC,{};" | ||
| 48 | "MOV.{} RC.{},{};" | ||
| 49 | "MOV.{} {},RC;", | ||
| 50 | type, composite, type, swizzle, object, type, ret); | ||
| 51 | } else if (ret != composite) { | ||
| 52 | // The input composite is not aliased with the return value so we have to copy it before | ||
| 53 | // hand. But the insert object is not aliased with the return value, so we don't have to | ||
| 54 | // worry about that | ||
| 55 | ctx.Add("MOV.{} {},{};" | ||
| 56 | "MOV.{} {}.{},{};", | ||
| 57 | type, ret, composite, type, ret, swizzle, object); | ||
| 58 | } else { | ||
| 59 | // The return value is alised so we can just insert the object, it doesn't matter if it's | ||
| 60 | // aliased | ||
| 61 | ctx.Add("MOV.{} {}.{},{};", type, ret, swizzle, object); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 67 | const IR::Value& e2) { | ||
| 68 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 72 | const IR::Value& e2, const IR::Value& e3) { | ||
| 73 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3); | ||
| 74 | } | ||
| 75 | |||
| 76 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 77 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { | ||
| 78 | CompositeConstruct<&IR::Value::U32, 'U'>(ctx, inst, e1, e2, e3, e4); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 82 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 86 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 90 | CompositeExtract(ctx, inst, composite, index, 'U'); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitCompositeInsertU32x2([[maybe_unused]] EmitContext& ctx, | ||
| 94 | [[maybe_unused]] Register composite, | ||
| 95 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 96 | throw NotImplementedException("GLASM instruction"); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitCompositeInsertU32x3([[maybe_unused]] EmitContext& ctx, | ||
| 100 | [[maybe_unused]] Register composite, | ||
| 101 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 102 | throw NotImplementedException("GLASM instruction"); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitCompositeInsertU32x4([[maybe_unused]] EmitContext& ctx, | ||
| 106 | [[maybe_unused]] Register composite, | ||
| 107 | [[maybe_unused]] ScalarU32 object, [[maybe_unused]] u32 index) { | ||
| 108 | throw NotImplementedException("GLASM instruction"); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitCompositeConstructF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 112 | [[maybe_unused]] Register e2) { | ||
| 113 | throw NotImplementedException("GLASM instruction"); | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitCompositeConstructF16x3([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 117 | [[maybe_unused]] Register e2, [[maybe_unused]] Register e3) { | ||
| 118 | throw NotImplementedException("GLASM instruction"); | ||
| 119 | } | ||
| 120 | |||
| 121 | void EmitCompositeConstructF16x4([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register e1, | ||
| 122 | [[maybe_unused]] Register e2, [[maybe_unused]] Register e3, | ||
| 123 | [[maybe_unused]] Register e4) { | ||
| 124 | throw NotImplementedException("GLASM instruction"); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitCompositeExtractF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 128 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 129 | throw NotImplementedException("GLASM instruction"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitCompositeExtractF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 133 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 134 | throw NotImplementedException("GLASM instruction"); | ||
| 135 | } | ||
| 136 | |||
| 137 | void EmitCompositeExtractF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 138 | [[maybe_unused]] Register composite, [[maybe_unused]] u32 index) { | ||
| 139 | throw NotImplementedException("GLASM instruction"); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitCompositeInsertF16x2([[maybe_unused]] EmitContext& ctx, | ||
| 143 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 144 | [[maybe_unused]] u32 index) { | ||
| 145 | throw NotImplementedException("GLASM instruction"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitCompositeInsertF16x3([[maybe_unused]] EmitContext& ctx, | ||
| 149 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 150 | [[maybe_unused]] u32 index) { | ||
| 151 | throw NotImplementedException("GLASM instruction"); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitCompositeInsertF16x4([[maybe_unused]] EmitContext& ctx, | ||
| 155 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 156 | [[maybe_unused]] u32 index) { | ||
| 157 | throw NotImplementedException("GLASM instruction"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 161 | const IR::Value& e2) { | ||
| 162 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2); | ||
| 163 | } | ||
| 164 | |||
| 165 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 166 | const IR::Value& e2, const IR::Value& e3) { | ||
| 167 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3); | ||
| 168 | } | ||
| 169 | |||
| 170 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 171 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4) { | ||
| 172 | CompositeConstruct<&IR::Value::F32, 'F'>(ctx, inst, e1, e2, e3, e4); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 176 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 180 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index) { | ||
| 184 | CompositeExtract(ctx, inst, composite, index, 'F'); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 188 | ScalarF32 object, u32 index) { | ||
| 189 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 193 | ScalarF32 object, u32 index) { | ||
| 194 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 195 | } | ||
| 196 | |||
| 197 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 198 | ScalarF32 object, u32 index) { | ||
| 199 | CompositeInsert(ctx, inst, composite, object, index, 'F'); | ||
| 200 | } | ||
| 201 | |||
| 202 | void EmitCompositeConstructF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 203 | throw NotImplementedException("GLASM instruction"); | ||
| 204 | } | ||
| 205 | |||
| 206 | void EmitCompositeConstructF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 207 | throw NotImplementedException("GLASM instruction"); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitCompositeConstructF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitCompositeExtractF64x2([[maybe_unused]] EmitContext& ctx) { | ||
| 215 | throw NotImplementedException("GLASM instruction"); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitCompositeExtractF64x3([[maybe_unused]] EmitContext& ctx) { | ||
| 219 | throw NotImplementedException("GLASM instruction"); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitCompositeExtractF64x4([[maybe_unused]] EmitContext& ctx) { | ||
| 223 | throw NotImplementedException("GLASM instruction"); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitCompositeInsertF64x2([[maybe_unused]] EmitContext& ctx, | ||
| 227 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 228 | [[maybe_unused]] u32 index) { | ||
| 229 | throw NotImplementedException("GLASM instruction"); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitCompositeInsertF64x3([[maybe_unused]] EmitContext& ctx, | ||
| 233 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 234 | [[maybe_unused]] u32 index) { | ||
| 235 | throw NotImplementedException("GLASM instruction"); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitCompositeInsertF64x4([[maybe_unused]] EmitContext& ctx, | ||
| 239 | [[maybe_unused]] Register composite, [[maybe_unused]] Register object, | ||
| 240 | [[maybe_unused]] u32 index) { | ||
| 241 | throw NotImplementedException("GLASM instruction"); | ||
| 242 | } | ||
| 243 | |||
| 244 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp new file mode 100644 index 000000000..02c9dc6d7 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp | |||
| @@ -0,0 +1,346 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | #include "shader_recompiler/shader_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 16 | std::string_view size) { | ||
| 17 | if (!binding.IsImmediate()) { | ||
| 18 | throw NotImplementedException("Indirect constant buffer loading"); | ||
| 19 | } | ||
| 20 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 21 | if (offset.type == Type::U32) { | ||
| 22 | // Avoid reading arrays out of bounds, matching hardware's behavior | ||
| 23 | if (offset.imm_u32 >= 0x10'000) { | ||
| 24 | ctx.Add("MOV.S {},0;", ret); | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | ctx.Add("LDC.{} {},c{}[{}];", size, ret, binding.U32(), offset); | ||
| 29 | } | ||
| 30 | |||
| 31 | bool IsInputArray(Stage stage) { | ||
| 32 | return stage == Stage::Geometry || stage == Stage::TessellationControl || | ||
| 33 | stage == Stage::TessellationEval; | ||
| 34 | } | ||
| 35 | |||
| 36 | std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) { | ||
| 37 | return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : ""; | ||
| 38 | } | ||
| 39 | |||
| 40 | u32 TexCoordIndex(IR::Attribute attr) { | ||
| 41 | return (static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4; | ||
| 42 | } | ||
| 43 | } // Anonymous namespace | ||
| 44 | |||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 46 | GetCbuf(ctx, inst, binding, offset, "U8"); | ||
| 47 | } | ||
| 48 | |||
| 49 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 50 | GetCbuf(ctx, inst, binding, offset, "S8"); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 54 | GetCbuf(ctx, inst, binding, offset, "U16"); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 58 | GetCbuf(ctx, inst, binding, offset, "S16"); | ||
| 59 | } | ||
| 60 | |||
| 61 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 62 | GetCbuf(ctx, inst, binding, offset, "U32"); | ||
| 63 | } | ||
| 64 | |||
| 65 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset) { | ||
| 66 | GetCbuf(ctx, inst, binding, offset, "F32"); | ||
| 67 | } | ||
| 68 | |||
| 69 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 70 | ScalarU32 offset) { | ||
| 71 | GetCbuf(ctx, inst, binding, offset, "U32X2"); | ||
| 72 | } | ||
| 73 | |||
| 74 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex) { | ||
| 75 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 76 | const char swizzle{"xyzw"[element]}; | ||
| 77 | if (IR::IsGeneric(attr)) { | ||
| 78 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 79 | ctx.Add("MOV.F {}.x,in_attr{}{}[0].{};", inst, index, VertexIndex(ctx, vertex), swizzle); | ||
| 80 | return; | ||
| 81 | } | ||
| 82 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9Q) { | ||
| 83 | const u32 index{TexCoordIndex(attr)}; | ||
| 84 | ctx.Add("MOV.F {}.x,{}.texcoord[{}].{};", inst, ctx.attrib_name, index, swizzle); | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | switch (attr) { | ||
| 88 | case IR::Attribute::PrimitiveId: | ||
| 89 | ctx.Add("MOV.S {}.x,primitive.id;", inst); | ||
| 90 | break; | ||
| 91 | case IR::Attribute::PositionX: | ||
| 92 | case IR::Attribute::PositionY: | ||
| 93 | case IR::Attribute::PositionZ: | ||
| 94 | case IR::Attribute::PositionW: | ||
| 95 | if (IsInputArray(ctx.stage)) { | ||
| 96 | ctx.Add("MOV.F {}.x,vertex_position{}.{};", inst, VertexIndex(ctx, vertex), swizzle); | ||
| 97 | } else { | ||
| 98 | ctx.Add("MOV.F {}.x,{}.position.{};", inst, ctx.attrib_name, swizzle); | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 102 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 103 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 104 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 105 | ctx.Add("MOV.F {}.x,{}.color.{};", inst, ctx.attrib_name, swizzle); | ||
| 106 | break; | ||
| 107 | case IR::Attribute::PointSpriteS: | ||
| 108 | case IR::Attribute::PointSpriteT: | ||
| 109 | ctx.Add("MOV.F {}.x,{}.pointcoord.{};", inst, ctx.attrib_name, swizzle); | ||
| 110 | break; | ||
| 111 | case IR::Attribute::TessellationEvaluationPointU: | ||
| 112 | case IR::Attribute::TessellationEvaluationPointV: | ||
| 113 | ctx.Add("MOV.F {}.x,vertex.tesscoord.{};", inst, swizzle); | ||
| 114 | break; | ||
| 115 | case IR::Attribute::InstanceId: | ||
| 116 | ctx.Add("MOV.S {}.x,{}.instance;", inst, ctx.attrib_name); | ||
| 117 | break; | ||
| 118 | case IR::Attribute::VertexId: | ||
| 119 | ctx.Add("MOV.S {}.x,{}.id;", inst, ctx.attrib_name); | ||
| 120 | break; | ||
| 121 | case IR::Attribute::FrontFace: | ||
| 122 | ctx.Add("CMP.S {}.x,{}.facing.x,0,-1;", inst, ctx.attrib_name); | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | throw NotImplementedException("Get attribute {}", attr); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, | ||
| 130 | [[maybe_unused]] ScalarU32 vertex) { | ||
| 131 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 132 | const char swizzle{"xyzw"[element]}; | ||
| 133 | if (IR::IsGeneric(attr)) { | ||
| 134 | const u32 index{IR::GenericAttributeIndex(attr)}; | ||
| 135 | ctx.Add("MOV.F out_attr{}[0].{},{};", index, swizzle, value); | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | if (attr >= IR::Attribute::FixedFncTexture0S && attr <= IR::Attribute::FixedFncTexture9R) { | ||
| 139 | const u32 index{TexCoordIndex(attr)}; | ||
| 140 | ctx.Add("MOV.F result.texcoord[{}].{},{};", index, swizzle, value); | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | switch (attr) { | ||
| 144 | case IR::Attribute::Layer: | ||
| 145 | if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 146 | ctx.Add("MOV.F result.layer.x,{};", value); | ||
| 147 | } else { | ||
| 148 | LOG_WARNING(Shader_GLASM, | ||
| 149 | "Layer stored outside of geometry shader not supported by device"); | ||
| 150 | } | ||
| 151 | break; | ||
| 152 | case IR::Attribute::ViewportIndex: | ||
| 153 | if (ctx.stage == Stage::Geometry || ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 154 | ctx.Add("MOV.F result.viewport.x,{};", value); | ||
| 155 | } else { | ||
| 156 | LOG_WARNING(Shader_GLASM, | ||
| 157 | "Viewport stored outside of geometry shader not supported by device"); | ||
| 158 | } | ||
| 159 | break; | ||
| 160 | case IR::Attribute::ViewportMask: | ||
| 161 | // NV_viewport_array2 is required to access result.viewportmask, regardless of shader stage. | ||
| 162 | if (ctx.profile.support_viewport_index_layer_non_geometry) { | ||
| 163 | ctx.Add("MOV.F result.viewportmask[0].x,{};", value); | ||
| 164 | } else { | ||
| 165 | LOG_WARNING(Shader_GLASM, "Device does not support storing to ViewportMask"); | ||
| 166 | } | ||
| 167 | break; | ||
| 168 | case IR::Attribute::PointSize: | ||
| 169 | ctx.Add("MOV.F result.pointsize.x,{};", value); | ||
| 170 | break; | ||
| 171 | case IR::Attribute::PositionX: | ||
| 172 | case IR::Attribute::PositionY: | ||
| 173 | case IR::Attribute::PositionZ: | ||
| 174 | case IR::Attribute::PositionW: | ||
| 175 | ctx.Add("MOV.F result.position.{},{};", swizzle, value); | ||
| 176 | break; | ||
| 177 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 178 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 179 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 180 | case IR::Attribute::ColorFrontDiffuseA: | ||
| 181 | ctx.Add("MOV.F result.color.{},{};", swizzle, value); | ||
| 182 | break; | ||
| 183 | case IR::Attribute::ColorFrontSpecularR: | ||
| 184 | case IR::Attribute::ColorFrontSpecularG: | ||
| 185 | case IR::Attribute::ColorFrontSpecularB: | ||
| 186 | case IR::Attribute::ColorFrontSpecularA: | ||
| 187 | ctx.Add("MOV.F result.color.secondary.{},{};", swizzle, value); | ||
| 188 | break; | ||
| 189 | case IR::Attribute::ColorBackDiffuseR: | ||
| 190 | case IR::Attribute::ColorBackDiffuseG: | ||
| 191 | case IR::Attribute::ColorBackDiffuseB: | ||
| 192 | case IR::Attribute::ColorBackDiffuseA: | ||
| 193 | ctx.Add("MOV.F result.color.back.{},{};", swizzle, value); | ||
| 194 | break; | ||
| 195 | case IR::Attribute::ColorBackSpecularR: | ||
| 196 | case IR::Attribute::ColorBackSpecularG: | ||
| 197 | case IR::Attribute::ColorBackSpecularB: | ||
| 198 | case IR::Attribute::ColorBackSpecularA: | ||
| 199 | ctx.Add("MOV.F result.color.back.secondary.{},{};", swizzle, value); | ||
| 200 | break; | ||
| 201 | case IR::Attribute::FogCoordinate: | ||
| 202 | ctx.Add("MOV.F result.fogcoord.x,{};", value); | ||
| 203 | break; | ||
| 204 | case IR::Attribute::ClipDistance0: | ||
| 205 | case IR::Attribute::ClipDistance1: | ||
| 206 | case IR::Attribute::ClipDistance2: | ||
| 207 | case IR::Attribute::ClipDistance3: | ||
| 208 | case IR::Attribute::ClipDistance4: | ||
| 209 | case IR::Attribute::ClipDistance5: | ||
| 210 | case IR::Attribute::ClipDistance6: | ||
| 211 | case IR::Attribute::ClipDistance7: { | ||
| 212 | const u32 index{static_cast<u32>(attr) - static_cast<u32>(IR::Attribute::ClipDistance0)}; | ||
| 213 | ctx.Add("MOV.F result.clip[{}].x,{};", index, value); | ||
| 214 | break; | ||
| 215 | } | ||
| 216 | default: | ||
| 217 | throw NotImplementedException("Set attribute {}", attr); | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex) { | ||
| 222 | // RC.x = base_index | ||
| 223 | // RC.y = masked_index | ||
| 224 | // RC.z = compare_index | ||
| 225 | ctx.Add("SHR.S RC.x,{},2;" | ||
| 226 | "AND.S RC.y,RC.x,3;" | ||
| 227 | "SHR.S RC.z,{},4;", | ||
| 228 | offset, offset); | ||
| 229 | |||
| 230 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 231 | u32 num_endifs{}; | ||
| 232 | const auto read{[&](u32 compare_index, const std::array<std::string, 4>& values) { | ||
| 233 | ++num_endifs; | ||
| 234 | ctx.Add("SEQ.S.CC RC.w,RC.z,{};" // compare_index | ||
| 235 | "IF NE.w;" | ||
| 236 | // X | ||
| 237 | "SEQ.S.CC RC.w,RC.y,0;" | ||
| 238 | "IF NE.w;" | ||
| 239 | "MOV {}.x,{};" | ||
| 240 | "ELSE;" | ||
| 241 | // Y | ||
| 242 | "SEQ.S.CC RC.w,RC.y,1;" | ||
| 243 | "IF NE.w;" | ||
| 244 | "MOV {}.x,{};" | ||
| 245 | "ELSE;" | ||
| 246 | // Z | ||
| 247 | "SEQ.S.CC RC.w,RC.y,2;" | ||
| 248 | "IF NE.w;" | ||
| 249 | "MOV {}.x,{};" | ||
| 250 | "ELSE;" | ||
| 251 | // W | ||
| 252 | "MOV {}.x,{};" | ||
| 253 | "ENDIF;" | ||
| 254 | "ENDIF;" | ||
| 255 | "ENDIF;" | ||
| 256 | "ELSE;", | ||
| 257 | compare_index, ret, values[0], ret, values[1], ret, values[2], ret, values[3]); | ||
| 258 | }}; | ||
| 259 | const auto read_swizzled{[&](u32 compare_index, std::string_view value) { | ||
| 260 | const std::array values{fmt::format("{}.x", value), fmt::format("{}.y", value), | ||
| 261 | fmt::format("{}.z", value), fmt::format("{}.w", value)}; | ||
| 262 | read(compare_index, values); | ||
| 263 | }}; | ||
| 264 | if (ctx.info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 265 | const u32 index{static_cast<u32>(IR::Attribute::PositionX)}; | ||
| 266 | if (IsInputArray(ctx.stage)) { | ||
| 267 | read_swizzled(index, fmt::format("vertex_position{}", VertexIndex(ctx, vertex))); | ||
| 268 | } else { | ||
| 269 | read_swizzled(index, fmt::format("{}.position", ctx.attrib_name)); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | for (u32 index = 0; index < static_cast<u32>(IR::NUM_GENERICS); ++index) { | ||
| 273 | if (!ctx.info.loads.Generic(index)) { | ||
| 274 | continue; | ||
| 275 | } | ||
| 276 | read_swizzled(index, fmt::format("in_attr{}{}[0]", index, VertexIndex(ctx, vertex))); | ||
| 277 | } | ||
| 278 | for (u32 i = 0; i < num_endifs; ++i) { | ||
| 279 | ctx.Add("ENDIF;"); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | void EmitSetAttributeIndexed([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarU32 offset, | ||
| 284 | [[maybe_unused]] ScalarF32 value, [[maybe_unused]] ScalarU32 vertex) { | ||
| 285 | throw NotImplementedException("GLASM instruction"); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch) { | ||
| 289 | if (!IR::IsGeneric(patch)) { | ||
| 290 | throw NotImplementedException("Non-generic patch load"); | ||
| 291 | } | ||
| 292 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 293 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 294 | const char swizzle{"xyzw"[element]}; | ||
| 295 | const std::string_view out{ctx.stage == Stage::TessellationControl ? ".out" : ""}; | ||
| 296 | ctx.Add("MOV.F {},primitive{}.patch.attrib[{}].{};", inst, out, index, swizzle); | ||
| 297 | } | ||
| 298 | |||
| 299 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value) { | ||
| 300 | if (IR::IsGeneric(patch)) { | ||
| 301 | const u32 index{IR::GenericPatchIndex(patch)}; | ||
| 302 | const u32 element{IR::GenericPatchElement(patch)}; | ||
| 303 | ctx.Add("MOV.F result.patch.attrib[{}].{},{};", index, "xyzw"[element], value); | ||
| 304 | return; | ||
| 305 | } | ||
| 306 | switch (patch) { | ||
| 307 | case IR::Patch::TessellationLodLeft: | ||
| 308 | case IR::Patch::TessellationLodRight: | ||
| 309 | case IR::Patch::TessellationLodTop: | ||
| 310 | case IR::Patch::TessellationLodBottom: { | ||
| 311 | const u32 index{static_cast<u32>(patch) - u32(IR::Patch::TessellationLodLeft)}; | ||
| 312 | ctx.Add("MOV.F result.patch.tessouter[{}].x,{};", index, value); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | case IR::Patch::TessellationLodInteriorU: | ||
| 316 | ctx.Add("MOV.F result.patch.tessinner[0].x,{};", value); | ||
| 317 | break; | ||
| 318 | case IR::Patch::TessellationLodInteriorV: | ||
| 319 | ctx.Add("MOV.F result.patch.tessinner[1].x,{};", value); | ||
| 320 | break; | ||
| 321 | default: | ||
| 322 | throw NotImplementedException("Patch {}", patch); | ||
| 323 | } | ||
| 324 | } | ||
| 325 | |||
| 326 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value) { | ||
| 327 | ctx.Add("MOV.F frag_color{}.{},{};", index, "xyzw"[component], value); | ||
| 328 | } | ||
| 329 | |||
| 330 | void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value) { | ||
| 331 | ctx.Add("MOV.S result.samplemask.x,{};", value); | ||
| 332 | } | ||
| 333 | |||
| 334 | void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value) { | ||
| 335 | ctx.Add("MOV.F result.depth.z,{};", value); | ||
| 336 | } | ||
| 337 | |||
| 338 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset) { | ||
| 339 | ctx.Add("MOV.U {},lmem[{}].x;", inst, word_offset); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value) { | ||
| 343 | ctx.Add("MOV.U lmem[{}].x,{};", word_offset, value); | ||
| 344 | } | ||
| 345 | |||
| 346 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_control_flow.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp new file mode 100644 index 000000000..ccdf1cbc8 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_convert.cpp | |||
| @@ -0,0 +1,231 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | std::string_view FpRounding(IR::FpRounding fp_rounding) { | ||
| 15 | switch (fp_rounding) { | ||
| 16 | case IR::FpRounding::DontCare: | ||
| 17 | return ""; | ||
| 18 | case IR::FpRounding::RN: | ||
| 19 | return ".ROUND"; | ||
| 20 | case IR::FpRounding::RZ: | ||
| 21 | return ".TRUNC"; | ||
| 22 | case IR::FpRounding::RM: | ||
| 23 | return ".FLR"; | ||
| 24 | case IR::FpRounding::RP: | ||
| 25 | return ".CEIL"; | ||
| 26 | } | ||
| 27 | throw InvalidArgument("Invalid floating-point rounding {}", fp_rounding); | ||
| 28 | } | ||
| 29 | |||
| 30 | template <typename InputType> | ||
| 31 | void Convert(EmitContext& ctx, IR::Inst& inst, InputType value, std::string_view dest, | ||
| 32 | std::string_view src, bool is_long_result) { | ||
| 33 | const std::string_view fp_rounding{FpRounding(inst.Flags<IR::FpControl>().rounding)}; | ||
| 34 | const auto ret{is_long_result ? ctx.reg_alloc.LongDefine(inst) : ctx.reg_alloc.Define(inst)}; | ||
| 35 | ctx.Add("CVT.{}.{}{} {}.x,{};", dest, src, fp_rounding, ret, value); | ||
| 36 | } | ||
| 37 | } // Anonymous namespace | ||
| 38 | |||
| 39 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 40 | Convert(ctx, inst, value, "S16", "F16", false); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 44 | Convert(ctx, inst, value, "S16", "F32", false); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 48 | Convert(ctx, inst, value, "S16", "F64", false); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 52 | Convert(ctx, inst, value, "S32", "F16", false); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 56 | Convert(ctx, inst, value, "S32", "F32", false); | ||
| 57 | } | ||
| 58 | |||
| 59 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 60 | Convert(ctx, inst, value, "S32", "F64", false); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 64 | Convert(ctx, inst, value, "S64", "F16", true); | ||
| 65 | } | ||
| 66 | |||
| 67 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 68 | Convert(ctx, inst, value, "S64", "F32", true); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 72 | Convert(ctx, inst, value, "S64", "F64", true); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 76 | Convert(ctx, inst, value, "U16", "F16", false); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 80 | Convert(ctx, inst, value, "U16", "F32", false); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 84 | Convert(ctx, inst, value, "U16", "F64", false); | ||
| 85 | } | ||
| 86 | |||
| 87 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 88 | Convert(ctx, inst, value, "U32", "F16", false); | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 92 | Convert(ctx, inst, value, "U32", "F32", false); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 96 | Convert(ctx, inst, value, "U32", "F64", false); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 100 | Convert(ctx, inst, value, "U64", "F16", true); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 104 | Convert(ctx, inst, value, "U64", "F32", true); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 108 | Convert(ctx, inst, value, "U64", "F64", true); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 112 | Convert(ctx, inst, value, "U64", "U32", true); | ||
| 113 | } | ||
| 114 | |||
| 115 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 116 | Convert(ctx, inst, value, "U32", "U64", false); | ||
| 117 | } | ||
| 118 | |||
| 119 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 120 | Convert(ctx, inst, value, "F16", "F32", false); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 124 | Convert(ctx, inst, value, "F32", "F16", false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 128 | Convert(ctx, inst, value, "F32", "F64", false); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 132 | Convert(ctx, inst, value, "F64", "F32", true); | ||
| 133 | } | ||
| 134 | |||
| 135 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 136 | Convert(ctx, inst, value, "F16", "S8", false); | ||
| 137 | } | ||
| 138 | |||
| 139 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 140 | Convert(ctx, inst, value, "F16", "S16", false); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 144 | Convert(ctx, inst, value, "F16", "S32", false); | ||
| 145 | } | ||
| 146 | |||
| 147 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 148 | Convert(ctx, inst, value, "F16", "S64", false); | ||
| 149 | } | ||
| 150 | |||
| 151 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 152 | Convert(ctx, inst, value, "F16", "U8", false); | ||
| 153 | } | ||
| 154 | |||
| 155 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 156 | Convert(ctx, inst, value, "F16", "U16", false); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 160 | Convert(ctx, inst, value, "F16", "U32", false); | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 164 | Convert(ctx, inst, value, "F16", "U64", false); | ||
| 165 | } | ||
| 166 | |||
| 167 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 168 | Convert(ctx, inst, value, "F32", "S8", false); | ||
| 169 | } | ||
| 170 | |||
| 171 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 172 | Convert(ctx, inst, value, "F32", "S16", false); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 176 | Convert(ctx, inst, value, "F32", "S32", false); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 180 | Convert(ctx, inst, value, "F32", "S64", false); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 184 | Convert(ctx, inst, value, "F32", "U8", false); | ||
| 185 | } | ||
| 186 | |||
| 187 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 188 | Convert(ctx, inst, value, "F32", "U16", false); | ||
| 189 | } | ||
| 190 | |||
| 191 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 192 | Convert(ctx, inst, value, "F32", "U32", false); | ||
| 193 | } | ||
| 194 | |||
| 195 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 196 | Convert(ctx, inst, value, "F32", "U64", false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 200 | Convert(ctx, inst, value, "F64", "S8", true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 204 | Convert(ctx, inst, value, "F64", "S16", true); | ||
| 205 | } | ||
| 206 | |||
| 207 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 208 | Convert(ctx, inst, value, "F64", "S32", true); | ||
| 209 | } | ||
| 210 | |||
| 211 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 212 | Convert(ctx, inst, value, "F64", "S64", true); | ||
| 213 | } | ||
| 214 | |||
| 215 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 216 | Convert(ctx, inst, value, "F64", "U8", true); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 220 | Convert(ctx, inst, value, "F64", "U16", true); | ||
| 221 | } | ||
| 222 | |||
| 223 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 224 | Convert(ctx, inst, value, "F64", "U32", true); | ||
| 225 | } | ||
| 226 | |||
| 227 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 228 | Convert(ctx, inst, value, "F64", "U64", true); | ||
| 229 | } | ||
| 230 | |||
| 231 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp new file mode 100644 index 000000000..4ed58619d --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_floating_point.cpp | |||
| @@ -0,0 +1,414 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | template <typename InputType> | ||
| 15 | void Compare(EmitContext& ctx, IR::Inst& inst, InputType lhs, InputType rhs, std::string_view op, | ||
| 16 | std::string_view type, bool ordered, bool inequality = false) { | ||
| 17 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 18 | ctx.Add("{}.{} RC.x,{},{};", op, type, lhs, rhs); | ||
| 19 | if (ordered && inequality) { | ||
| 20 | ctx.Add("SEQ.{} RC.y,{},{};" | ||
| 21 | "SEQ.{} RC.z,{},{};" | ||
| 22 | "AND.U RC.x,RC.x,RC.y;" | ||
| 23 | "AND.U RC.x,RC.x,RC.z;" | ||
| 24 | "SNE.S {}.x,RC.x,0;", | ||
| 25 | type, lhs, lhs, type, rhs, rhs, ret); | ||
| 26 | } else if (ordered) { | ||
| 27 | ctx.Add("SNE.S {}.x,RC.x,0;", ret); | ||
| 28 | } else { | ||
| 29 | ctx.Add("SNE.{} RC.y,{},{};" | ||
| 30 | "SNE.{} RC.z,{},{};" | ||
| 31 | "OR.U RC.x,RC.x,RC.y;" | ||
| 32 | "OR.U RC.x,RC.x,RC.z;" | ||
| 33 | "SNE.S {}.x,RC.x,0;", | ||
| 34 | type, lhs, lhs, type, rhs, rhs, ret); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | template <typename InputType> | ||
| 39 | void Clamp(EmitContext& ctx, Register ret, InputType value, InputType min_value, | ||
| 40 | InputType max_value, std::string_view type) { | ||
| 41 | // Call MAX first to properly clamp nan to min_value instead | ||
| 42 | ctx.Add("MAX.{} RC.x,{},{};" | ||
| 43 | "MIN.{} {}.x,RC.x,{};", | ||
| 44 | type, min_value, value, type, ret, max_value); | ||
| 45 | } | ||
| 46 | |||
| 47 | std::string_view Precise(IR::Inst& inst) { | ||
| 48 | const bool precise{inst.Flags<IR::FpControl>().no_contraction}; | ||
| 49 | return precise ? ".PREC" : ""; | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | void EmitFPAbs16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 54 | [[maybe_unused]] Register value) { | ||
| 55 | throw NotImplementedException("GLASM instruction"); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 59 | ctx.Add("MOV.F {}.x,|{}|;", inst, value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 63 | ctx.LongAdd("MOV.F64 {}.x,|{}|;", inst, value); | ||
| 64 | } | ||
| 65 | |||
| 66 | void EmitFPAdd16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 67 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | ||
| 68 | throw NotImplementedException("GLASM instruction"); | ||
| 69 | } | ||
| 70 | |||
| 71 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 72 | ctx.Add("ADD.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); | ||
| 73 | } | ||
| 74 | |||
| 75 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 76 | ctx.Add("ADD.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); | ||
| 77 | } | ||
| 78 | |||
| 79 | void EmitFPFma16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 80 | [[maybe_unused]] Register a, [[maybe_unused]] Register b, | ||
| 81 | [[maybe_unused]] Register c) { | ||
| 82 | throw NotImplementedException("GLASM instruction"); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c) { | ||
| 86 | ctx.Add("MAD.F{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b, c); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c) { | ||
| 90 | ctx.Add("MAD.F64{} {}.x,{},{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b, c); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 94 | ctx.Add("MAX.F {}.x,{},{};", inst, a, b); | ||
| 95 | } | ||
| 96 | |||
| 97 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 98 | ctx.LongAdd("MAX.F64 {}.x,{},{};", inst, a, b); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 102 | ctx.Add("MIN.F {}.x,{},{};", inst, a, b); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 106 | ctx.LongAdd("MIN.F64 {}.x,{},{};", inst, a, b); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitFPMul16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 110 | [[maybe_unused]] Register a, [[maybe_unused]] Register b) { | ||
| 111 | throw NotImplementedException("GLASM instruction"); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b) { | ||
| 115 | ctx.Add("MUL.F{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.Define(inst), a, b); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b) { | ||
| 119 | ctx.Add("MUL.F64{} {}.x,{},{};", Precise(inst), ctx.reg_alloc.LongDefine(inst), a, b); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitFPNeg16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 123 | throw NotImplementedException("GLASM instruction"); | ||
| 124 | } | ||
| 125 | |||
| 126 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value) { | ||
| 127 | ctx.Add("MOV.F {}.x,-{};", inst, value); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 131 | ctx.LongAdd("MOV.F64 {}.x,-{};", inst, value); | ||
| 132 | } | ||
| 133 | |||
| 134 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 135 | ctx.Add("SIN {}.x,{};", inst, value); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 139 | ctx.Add("COS {}.x,{};", inst, value); | ||
| 140 | } | ||
| 141 | |||
| 142 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 143 | ctx.Add("EX2 {}.x,{};", inst, value); | ||
| 144 | } | ||
| 145 | |||
| 146 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 147 | ctx.Add("LG2 {}.x,{};", inst, value); | ||
| 148 | } | ||
| 149 | |||
| 150 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 151 | ctx.Add("RCP {}.x,{};", inst, value); | ||
| 152 | } | ||
| 153 | |||
| 154 | void EmitFPRecip64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 155 | throw NotImplementedException("GLASM instruction"); | ||
| 156 | } | ||
| 157 | |||
| 158 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 159 | ctx.Add("RSQ {}.x,{};", inst, value); | ||
| 160 | } | ||
| 161 | |||
| 162 | void EmitFPRecipSqrt64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 163 | throw NotImplementedException("GLASM instruction"); | ||
| 164 | } | ||
| 165 | |||
| 166 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 167 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 168 | ctx.Add("RSQ RC.x,{};RCP {}.x,RC.x;", value, ret); | ||
| 169 | } | ||
| 170 | |||
| 171 | void EmitFPSaturate16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 172 | throw NotImplementedException("GLASM instruction"); | ||
| 173 | } | ||
| 174 | |||
| 175 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 176 | ctx.Add("MOV.F.SAT {}.x,{};", inst, value); | ||
| 177 | } | ||
| 178 | |||
| 179 | void EmitFPSaturate64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 180 | throw NotImplementedException("GLASM instruction"); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitFPClamp16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value, | ||
| 184 | [[maybe_unused]] Register min_value, [[maybe_unused]] Register max_value) { | ||
| 185 | throw NotImplementedException("GLASM instruction"); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, | ||
| 189 | ScalarF32 max_value) { | ||
| 190 | Clamp(ctx, ctx.reg_alloc.Define(inst), value, min_value, max_value, "F"); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, | ||
| 194 | ScalarF64 max_value) { | ||
| 195 | Clamp(ctx, ctx.reg_alloc.LongDefine(inst), value, min_value, max_value, "F64"); | ||
| 196 | } | ||
| 197 | |||
| 198 | void EmitFPRoundEven16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 199 | throw NotImplementedException("GLASM instruction"); | ||
| 200 | } | ||
| 201 | |||
| 202 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 203 | ctx.Add("ROUND.F {}.x,{};", inst, value); | ||
| 204 | } | ||
| 205 | |||
| 206 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 207 | ctx.LongAdd("ROUND.F64 {}.x,{};", inst, value); | ||
| 208 | } | ||
| 209 | |||
| 210 | void EmitFPFloor16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 215 | ctx.Add("FLR.F {}.x,{};", inst, value); | ||
| 216 | } | ||
| 217 | |||
| 218 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 219 | ctx.LongAdd("FLR.F64 {}.x,{};", inst, value); | ||
| 220 | } | ||
| 221 | |||
| 222 | void EmitFPCeil16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 223 | throw NotImplementedException("GLASM instruction"); | ||
| 224 | } | ||
| 225 | |||
| 226 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 227 | ctx.Add("CEIL.F {}.x,{};", inst, value); | ||
| 228 | } | ||
| 229 | |||
| 230 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 231 | ctx.LongAdd("CEIL.F64 {}.x,{};", inst, value); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitFPTrunc16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 235 | throw NotImplementedException("GLASM instruction"); | ||
| 236 | } | ||
| 237 | |||
| 238 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 239 | ctx.Add("TRUNC.F {}.x,{};", inst, value); | ||
| 240 | } | ||
| 241 | |||
| 242 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 243 | ctx.LongAdd("TRUNC.F64 {}.x,{};", inst, value); | ||
| 244 | } | ||
| 245 | |||
| 246 | void EmitFPOrdEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 247 | [[maybe_unused]] Register rhs) { | ||
| 248 | throw NotImplementedException("GLASM instruction"); | ||
| 249 | } | ||
| 250 | |||
| 251 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 252 | Compare(ctx, inst, lhs, rhs, "SEQ", "F", true); | ||
| 253 | } | ||
| 254 | |||
| 255 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 256 | Compare(ctx, inst, lhs, rhs, "SEQ", "F64", true); | ||
| 257 | } | ||
| 258 | |||
| 259 | void EmitFPUnordEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 260 | [[maybe_unused]] Register rhs) { | ||
| 261 | throw NotImplementedException("GLASM instruction"); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 265 | Compare(ctx, inst, lhs, rhs, "SEQ", "F", false); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 269 | Compare(ctx, inst, lhs, rhs, "SEQ", "F64", false); | ||
| 270 | } | ||
| 271 | |||
| 272 | void EmitFPOrdNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 273 | [[maybe_unused]] Register rhs) { | ||
| 274 | throw NotImplementedException("GLASM instruction"); | ||
| 275 | } | ||
| 276 | |||
| 277 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 278 | Compare(ctx, inst, lhs, rhs, "SNE", "F", true, true); | ||
| 279 | } | ||
| 280 | |||
| 281 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 282 | Compare(ctx, inst, lhs, rhs, "SNE", "F64", true, true); | ||
| 283 | } | ||
| 284 | |||
| 285 | void EmitFPUnordNotEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 286 | [[maybe_unused]] Register rhs) { | ||
| 287 | throw NotImplementedException("GLASM instruction"); | ||
| 288 | } | ||
| 289 | |||
| 290 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 291 | Compare(ctx, inst, lhs, rhs, "SNE", "F", false, true); | ||
| 292 | } | ||
| 293 | |||
| 294 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 295 | Compare(ctx, inst, lhs, rhs, "SNE", "F64", false, true); | ||
| 296 | } | ||
| 297 | |||
| 298 | void EmitFPOrdLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 299 | [[maybe_unused]] Register rhs) { | ||
| 300 | throw NotImplementedException("GLASM instruction"); | ||
| 301 | } | ||
| 302 | |||
| 303 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 304 | Compare(ctx, inst, lhs, rhs, "SLT", "F", true); | ||
| 305 | } | ||
| 306 | |||
| 307 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 308 | Compare(ctx, inst, lhs, rhs, "SLT", "F64", true); | ||
| 309 | } | ||
| 310 | |||
| 311 | void EmitFPUnordLessThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 312 | [[maybe_unused]] Register rhs) { | ||
| 313 | throw NotImplementedException("GLASM instruction"); | ||
| 314 | } | ||
| 315 | |||
| 316 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 317 | Compare(ctx, inst, lhs, rhs, "SLT", "F", false); | ||
| 318 | } | ||
| 319 | |||
| 320 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 321 | Compare(ctx, inst, lhs, rhs, "SLT", "F64", false); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitFPOrdGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 325 | [[maybe_unused]] Register rhs) { | ||
| 326 | throw NotImplementedException("GLASM instruction"); | ||
| 327 | } | ||
| 328 | |||
| 329 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 330 | Compare(ctx, inst, lhs, rhs, "SGT", "F", true); | ||
| 331 | } | ||
| 332 | |||
| 333 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 334 | Compare(ctx, inst, lhs, rhs, "SGT", "F64", true); | ||
| 335 | } | ||
| 336 | |||
| 337 | void EmitFPUnordGreaterThan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 338 | [[maybe_unused]] Register rhs) { | ||
| 339 | throw NotImplementedException("GLASM instruction"); | ||
| 340 | } | ||
| 341 | |||
| 342 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 343 | Compare(ctx, inst, lhs, rhs, "SGT", "F", false); | ||
| 344 | } | ||
| 345 | |||
| 346 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 347 | Compare(ctx, inst, lhs, rhs, "SGT", "F64", false); | ||
| 348 | } | ||
| 349 | |||
| 350 | void EmitFPOrdLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 351 | [[maybe_unused]] Register rhs) { | ||
| 352 | throw NotImplementedException("GLASM instruction"); | ||
| 353 | } | ||
| 354 | |||
| 355 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 356 | Compare(ctx, inst, lhs, rhs, "SLE", "F", true); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 360 | Compare(ctx, inst, lhs, rhs, "SLE", "F64", true); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitFPUnordLessThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 364 | [[maybe_unused]] Register rhs) { | ||
| 365 | throw NotImplementedException("GLASM instruction"); | ||
| 366 | } | ||
| 367 | |||
| 368 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 369 | Compare(ctx, inst, lhs, rhs, "SLE", "F", false); | ||
| 370 | } | ||
| 371 | |||
| 372 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 373 | Compare(ctx, inst, lhs, rhs, "SLE", "F64", false); | ||
| 374 | } | ||
| 375 | |||
| 376 | void EmitFPOrdGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 377 | [[maybe_unused]] Register rhs) { | ||
| 378 | throw NotImplementedException("GLASM instruction"); | ||
| 379 | } | ||
| 380 | |||
| 381 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 382 | Compare(ctx, inst, lhs, rhs, "SGE", "F", true); | ||
| 383 | } | ||
| 384 | |||
| 385 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 386 | Compare(ctx, inst, lhs, rhs, "SGE", "F64", true); | ||
| 387 | } | ||
| 388 | |||
| 389 | void EmitFPUnordGreaterThanEqual16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register lhs, | ||
| 390 | [[maybe_unused]] Register rhs) { | ||
| 391 | throw NotImplementedException("GLASM instruction"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs) { | ||
| 395 | Compare(ctx, inst, lhs, rhs, "SGE", "F", false); | ||
| 396 | } | ||
| 397 | |||
| 398 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs) { | ||
| 399 | Compare(ctx, inst, lhs, rhs, "SGE", "F64", false); | ||
| 400 | } | ||
| 401 | |||
| 402 | void EmitFPIsNan16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Register value) { | ||
| 403 | throw NotImplementedException("GLASM instruction"); | ||
| 404 | } | ||
| 405 | |||
| 406 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value) { | ||
| 407 | Compare(ctx, inst, value, value, "SNE", "F", true, false); | ||
| 408 | } | ||
| 409 | |||
| 410 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value) { | ||
| 411 | Compare(ctx, inst, value, value, "SNE", "F64", true, false); | ||
| 412 | } | ||
| 413 | |||
| 414 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp new file mode 100644 index 000000000..09e3a9b82 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -0,0 +1,850 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLASM { | ||
| 13 | namespace { | ||
| 14 | struct ScopedRegister { | ||
| 15 | ScopedRegister() = default; | ||
| 16 | ScopedRegister(RegAlloc& reg_alloc_) : reg_alloc{®_alloc_}, reg{reg_alloc->AllocReg()} {} | ||
| 17 | |||
| 18 | ~ScopedRegister() { | ||
| 19 | if (reg_alloc) { | ||
| 20 | reg_alloc->FreeReg(reg); | ||
| 21 | } | ||
| 22 | } | ||
| 23 | |||
| 24 | ScopedRegister& operator=(ScopedRegister&& rhs) noexcept { | ||
| 25 | if (reg_alloc) { | ||
| 26 | reg_alloc->FreeReg(reg); | ||
| 27 | } | ||
| 28 | reg_alloc = std::exchange(rhs.reg_alloc, nullptr); | ||
| 29 | reg = rhs.reg; | ||
| 30 | return *this; | ||
| 31 | } | ||
| 32 | |||
| 33 | ScopedRegister(ScopedRegister&& rhs) noexcept | ||
| 34 | : reg_alloc{std::exchange(rhs.reg_alloc, nullptr)}, reg{rhs.reg} {} | ||
| 35 | |||
| 36 | ScopedRegister& operator=(const ScopedRegister&) = delete; | ||
| 37 | ScopedRegister(const ScopedRegister&) = delete; | ||
| 38 | |||
| 39 | RegAlloc* reg_alloc{}; | ||
| 40 | Register reg; | ||
| 41 | }; | ||
| 42 | |||
| 43 | std::string Texture(EmitContext& ctx, IR::TextureInstInfo info, | ||
| 44 | [[maybe_unused]] const IR::Value& index) { | ||
| 45 | // FIXME: indexed reads | ||
| 46 | if (info.type == TextureType::Buffer) { | ||
| 47 | return fmt::format("texture[{}]", ctx.texture_buffer_bindings.at(info.descriptor_index)); | ||
| 48 | } else { | ||
| 49 | return fmt::format("texture[{}]", ctx.texture_bindings.at(info.descriptor_index)); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | std::string Image(EmitContext& ctx, IR::TextureInstInfo info, | ||
| 54 | [[maybe_unused]] const IR::Value& index) { | ||
| 55 | // FIXME: indexed reads | ||
| 56 | if (info.type == TextureType::Buffer) { | ||
| 57 | return fmt::format("image[{}]", ctx.image_buffer_bindings.at(info.descriptor_index)); | ||
| 58 | } else { | ||
| 59 | return fmt::format("image[{}]", ctx.image_bindings.at(info.descriptor_index)); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view TextureType(IR::TextureInstInfo info) { | ||
| 64 | if (info.is_depth) { | ||
| 65 | switch (info.type) { | ||
| 66 | case TextureType::Color1D: | ||
| 67 | return "SHADOW1D"; | ||
| 68 | case TextureType::ColorArray1D: | ||
| 69 | return "SHADOWARRAY1D"; | ||
| 70 | case TextureType::Color2D: | ||
| 71 | return "SHADOW2D"; | ||
| 72 | case TextureType::ColorArray2D: | ||
| 73 | return "SHADOWARRAY2D"; | ||
| 74 | case TextureType::Color3D: | ||
| 75 | return "SHADOW3D"; | ||
| 76 | case TextureType::ColorCube: | ||
| 77 | return "SHADOWCUBE"; | ||
| 78 | case TextureType::ColorArrayCube: | ||
| 79 | return "SHADOWARRAYCUBE"; | ||
| 80 | case TextureType::Buffer: | ||
| 81 | return "SHADOWBUFFER"; | ||
| 82 | } | ||
| 83 | } else { | ||
| 84 | switch (info.type) { | ||
| 85 | case TextureType::Color1D: | ||
| 86 | return "1D"; | ||
| 87 | case TextureType::ColorArray1D: | ||
| 88 | return "ARRAY1D"; | ||
| 89 | case TextureType::Color2D: | ||
| 90 | return "2D"; | ||
| 91 | case TextureType::ColorArray2D: | ||
| 92 | return "ARRAY2D"; | ||
| 93 | case TextureType::Color3D: | ||
| 94 | return "3D"; | ||
| 95 | case TextureType::ColorCube: | ||
| 96 | return "CUBE"; | ||
| 97 | case TextureType::ColorArrayCube: | ||
| 98 | return "ARRAYCUBE"; | ||
| 99 | case TextureType::Buffer: | ||
| 100 | return "BUFFER"; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | throw InvalidArgument("Invalid texture type {}", info.type.Value()); | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string Offset(EmitContext& ctx, const IR::Value& offset) { | ||
| 107 | if (offset.IsEmpty()) { | ||
| 108 | return ""; | ||
| 109 | } | ||
| 110 | return fmt::format(",offset({})", Register{ctx.reg_alloc.Consume(offset)}); | ||
| 111 | } | ||
| 112 | |||
| 113 | std::pair<ScopedRegister, ScopedRegister> AllocOffsetsRegs(EmitContext& ctx, | ||
| 114 | const IR::Value& offset2) { | ||
| 115 | if (offset2.IsEmpty()) { | ||
| 116 | return {}; | ||
| 117 | } else { | ||
| 118 | return {ctx.reg_alloc, ctx.reg_alloc}; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | void SwizzleOffsets(EmitContext& ctx, Register off_x, Register off_y, const IR::Value& offset1, | ||
| 123 | const IR::Value& offset2) { | ||
| 124 | const Register offsets_a{ctx.reg_alloc.Consume(offset1)}; | ||
| 125 | const Register offsets_b{ctx.reg_alloc.Consume(offset2)}; | ||
| 126 | // Input swizzle: [XYXY] [XYXY] | ||
| 127 | // Output swizzle: [XXXX] [YYYY] | ||
| 128 | ctx.Add("MOV {}.x,{}.x;" | ||
| 129 | "MOV {}.y,{}.z;" | ||
| 130 | "MOV {}.z,{}.x;" | ||
| 131 | "MOV {}.w,{}.z;" | ||
| 132 | "MOV {}.x,{}.y;" | ||
| 133 | "MOV {}.y,{}.w;" | ||
| 134 | "MOV {}.z,{}.y;" | ||
| 135 | "MOV {}.w,{}.w;", | ||
| 136 | off_x, offsets_a, off_x, offsets_a, off_x, offsets_b, off_x, offsets_b, off_y, | ||
| 137 | offsets_a, off_y, offsets_a, off_y, offsets_b, off_y, offsets_b); | ||
| 138 | } | ||
| 139 | |||
| 140 | std::string GradOffset(const IR::Value& offset) { | ||
| 141 | if (offset.IsImmediate()) { | ||
| 142 | LOG_WARNING(Shader_GLASM, "Gradient offset is a scalar immediate"); | ||
| 143 | return ""; | ||
| 144 | } | ||
| 145 | IR::Inst* const vector{offset.InstRecursive()}; | ||
| 146 | if (!vector->AreAllArgsImmediates()) { | ||
| 147 | LOG_WARNING(Shader_GLASM, "Gradient offset vector is not immediate"); | ||
| 148 | return ""; | ||
| 149 | } | ||
| 150 | switch (vector->NumArgs()) { | ||
| 151 | case 1: | ||
| 152 | return fmt::format(",({})", static_cast<s32>(vector->Arg(0).U32())); | ||
| 153 | case 2: | ||
| 154 | return fmt::format(",({},{})", static_cast<s32>(vector->Arg(0).U32()), | ||
| 155 | static_cast<s32>(vector->Arg(1).U32())); | ||
| 156 | default: | ||
| 157 | throw LogicError("Invalid number of gradient offsets {}", vector->NumArgs()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | std::pair<std::string, ScopedRegister> Coord(EmitContext& ctx, const IR::Value& coord) { | ||
| 162 | if (coord.IsImmediate()) { | ||
| 163 | ScopedRegister scoped_reg(ctx.reg_alloc); | ||
| 164 | ctx.Add("MOV.U {}.x,{};", scoped_reg.reg, ScalarU32{ctx.reg_alloc.Consume(coord)}); | ||
| 165 | return {fmt::to_string(scoped_reg.reg), std::move(scoped_reg)}; | ||
| 166 | } | ||
| 167 | std::string coord_vec{fmt::to_string(Register{ctx.reg_alloc.Consume(coord)})}; | ||
| 168 | if (coord.InstRecursive()->HasUses()) { | ||
| 169 | // Move non-dead coords to a separate register, although this should never happen because | ||
| 170 | // vectors are only assembled for immediate texture instructions | ||
| 171 | ctx.Add("MOV.F RC,{};", coord_vec); | ||
| 172 | coord_vec = "RC"; | ||
| 173 | } | ||
| 174 | return {std::move(coord_vec), ScopedRegister{}}; | ||
| 175 | } | ||
| 176 | |||
| 177 | void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { | ||
| 178 | if (!sparse_inst) { | ||
| 179 | return; | ||
| 180 | } | ||
| 181 | const Register sparse_ret{ctx.reg_alloc.Define(*sparse_inst)}; | ||
| 182 | ctx.Add("MOV.S {},-1;" | ||
| 183 | "MOV.S {}(NONRESIDENT),0;", | ||
| 184 | sparse_ret, sparse_ret); | ||
| 185 | } | ||
| 186 | |||
| 187 | std::string_view FormatStorage(ImageFormat format) { | ||
| 188 | switch (format) { | ||
| 189 | case ImageFormat::Typeless: | ||
| 190 | return "U"; | ||
| 191 | case ImageFormat::R8_UINT: | ||
| 192 | return "U8"; | ||
| 193 | case ImageFormat::R8_SINT: | ||
| 194 | return "S8"; | ||
| 195 | case ImageFormat::R16_UINT: | ||
| 196 | return "U16"; | ||
| 197 | case ImageFormat::R16_SINT: | ||
| 198 | return "S16"; | ||
| 199 | case ImageFormat::R32_UINT: | ||
| 200 | return "U32"; | ||
| 201 | case ImageFormat::R32G32_UINT: | ||
| 202 | return "U32X2"; | ||
| 203 | case ImageFormat::R32G32B32A32_UINT: | ||
| 204 | return "U32X4"; | ||
| 205 | } | ||
| 206 | throw InvalidArgument("Invalid image format {}", format); | ||
| 207 | } | ||
| 208 | |||
| 209 | template <typename T> | ||
| 210 | void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, T value, | ||
| 211 | std::string_view op) { | ||
| 212 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 213 | const std::string_view type{TextureType(info)}; | ||
| 214 | const std::string image{Image(ctx, info, index)}; | ||
| 215 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 216 | ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::Inst* PrepareSparse(IR::Inst& inst) { | ||
| 220 | const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; | ||
| 221 | if (sparse_inst) { | ||
| 222 | sparse_inst->Invalidate(); | ||
| 223 | } | ||
| 224 | return sparse_inst; | ||
| 225 | } | ||
| 226 | } // Anonymous namespace | ||
| 227 | |||
| 228 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 229 | const IR::Value& coord, Register bias_lc, const IR::Value& offset) { | ||
| 230 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 231 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 232 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 233 | const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; | ||
| 234 | const std::string_view type{TextureType(info)}; | ||
| 235 | const std::string texture{Texture(ctx, info, index)}; | ||
| 236 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 237 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 238 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 239 | if (info.has_bias) { | ||
| 240 | if (info.type == TextureType::ColorArrayCube) { | ||
| 241 | ctx.Add("TXB.F{}{} {},{},{},{},ARRAYCUBE{};", lod_clamp_mod, sparse_mod, ret, coord_vec, | ||
| 242 | bias_lc, texture, offset_vec); | ||
| 243 | } else { | ||
| 244 | if (info.has_lod_clamp) { | ||
| 245 | ctx.Add("MOV.F {}.w,{}.x;" | ||
| 246 | "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", | ||
| 247 | coord_vec, bias_lc, sparse_mod, ret, coord_vec, bias_lc, texture, type, | ||
| 248 | offset_vec); | ||
| 249 | } else { | ||
| 250 | ctx.Add("MOV.F {}.w,{}.x;" | ||
| 251 | "TXB.F{} {},{},{},{}{};", | ||
| 252 | coord_vec, bias_lc, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } else { | ||
| 256 | if (info.has_lod_clamp && info.type == TextureType::ColorArrayCube) { | ||
| 257 | ctx.Add("TEX.F.LODCLAMP{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, | ||
| 258 | bias_lc, texture, offset_vec); | ||
| 259 | } else { | ||
| 260 | ctx.Add("TEX.F{}{} {},{},{},{}{};", lod_clamp_mod, sparse_mod, ret, coord_vec, texture, | ||
| 261 | type, offset_vec); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | StoreSparse(ctx, sparse_inst); | ||
| 265 | } | ||
| 266 | |||
| 267 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 268 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { | ||
| 269 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 270 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 271 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 272 | const std::string_view type{TextureType(info)}; | ||
| 273 | const std::string texture{Texture(ctx, info, index)}; | ||
| 274 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 275 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 276 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 277 | if (info.type == TextureType::ColorArrayCube) { | ||
| 278 | ctx.Add("TXL.F{} {},{},{},{},ARRAYCUBE{};", sparse_mod, ret, coord_vec, lod, texture, | ||
| 279 | offset_vec); | ||
| 280 | } else { | ||
| 281 | ctx.Add("MOV.F {}.w,{};" | ||
| 282 | "TXL.F{} {},{},{},{}{};", | ||
| 283 | coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 284 | } | ||
| 285 | StoreSparse(ctx, sparse_inst); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 289 | const IR::Value& coord, const IR::Value& dref, | ||
| 290 | const IR::Value& bias_lc, const IR::Value& offset) { | ||
| 291 | // Allocate early to avoid aliases | ||
| 292 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 293 | ScopedRegister staging; | ||
| 294 | if (info.type == TextureType::ColorArrayCube) { | ||
| 295 | staging = ScopedRegister{ctx.reg_alloc}; | ||
| 296 | } | ||
| 297 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | ||
| 298 | const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; | ||
| 299 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 300 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 301 | const std::string_view type{TextureType(info)}; | ||
| 302 | const std::string texture{Texture(ctx, info, index)}; | ||
| 303 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 304 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 305 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 306 | if (info.has_bias) { | ||
| 307 | if (info.has_lod_clamp) { | ||
| 308 | switch (info.type) { | ||
| 309 | case TextureType::Color1D: | ||
| 310 | case TextureType::ColorArray1D: | ||
| 311 | case TextureType::Color2D: | ||
| 312 | ctx.Add("MOV.F {}.z,{};" | ||
| 313 | "MOV.F {}.w,{}.x;" | ||
| 314 | "TXB.F.LODCLAMP{} {},{},{}.y,{},{}{};", | ||
| 315 | coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 316 | bias_lc_vec, texture, type, offset_vec); | ||
| 317 | break; | ||
| 318 | case TextureType::ColorArray2D: | ||
| 319 | case TextureType::ColorCube: | ||
| 320 | ctx.Add("MOV.F {}.w,{};" | ||
| 321 | "TXB.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 322 | coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, | ||
| 323 | offset_vec); | ||
| 324 | break; | ||
| 325 | default: | ||
| 326 | throw NotImplementedException("Invalid type {} with bias and lod clamp", | ||
| 327 | info.type.Value()); | ||
| 328 | } | ||
| 329 | } else { | ||
| 330 | switch (info.type) { | ||
| 331 | case TextureType::Color1D: | ||
| 332 | case TextureType::ColorArray1D: | ||
| 333 | case TextureType::Color2D: | ||
| 334 | ctx.Add("MOV.F {}.z,{};" | ||
| 335 | "MOV.F {}.w,{}.x;" | ||
| 336 | "TXB.F{} {},{},{},{}{};", | ||
| 337 | coord_vec, dref_val, coord_vec, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 338 | texture, type, offset_vec); | ||
| 339 | break; | ||
| 340 | case TextureType::ColorArray2D: | ||
| 341 | case TextureType::ColorCube: | ||
| 342 | ctx.Add("MOV.F {}.w,{};" | ||
| 343 | "TXB.F{} {},{},{},{},{}{};", | ||
| 344 | coord_vec, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, texture, type, | ||
| 345 | offset_vec); | ||
| 346 | break; | ||
| 347 | case TextureType::ColorArrayCube: | ||
| 348 | ctx.Add("MOV.F {}.x,{};" | ||
| 349 | "MOV.F {}.y,{}.x;" | ||
| 350 | "TXB.F{} {},{},{},{},{}{};", | ||
| 351 | staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 352 | staging.reg, texture, type, offset_vec); | ||
| 353 | break; | ||
| 354 | default: | ||
| 355 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 356 | } | ||
| 357 | } | ||
| 358 | } else { | ||
| 359 | if (info.has_lod_clamp) { | ||
| 360 | if (info.type != TextureType::ColorArrayCube) { | ||
| 361 | const bool w_swizzle{info.type == TextureType::ColorArray2D || | ||
| 362 | info.type == TextureType::ColorCube}; | ||
| 363 | const char dref_swizzle{w_swizzle ? 'w' : 'z'}; | ||
| 364 | ctx.Add("MOV.F {}.{},{};" | ||
| 365 | "TEX.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 366 | coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, bias_lc_vec, | ||
| 367 | texture, type, offset_vec); | ||
| 368 | } else { | ||
| 369 | ctx.Add("MOV.F {}.x,{};" | ||
| 370 | "MOV.F {}.y,{};" | ||
| 371 | "TEX.F.LODCLAMP{} {},{},{},{},{}{};", | ||
| 372 | staging.reg, dref_val, staging.reg, bias_lc_vec, sparse_mod, ret, coord_vec, | ||
| 373 | staging.reg, texture, type, offset_vec); | ||
| 374 | } | ||
| 375 | } else { | ||
| 376 | if (info.type != TextureType::ColorArrayCube) { | ||
| 377 | const bool w_swizzle{info.type == TextureType::ColorArray2D || | ||
| 378 | info.type == TextureType::ColorCube}; | ||
| 379 | const char dref_swizzle{w_swizzle ? 'w' : 'z'}; | ||
| 380 | ctx.Add("MOV.F {}.{},{};" | ||
| 381 | "TEX.F{} {},{},{},{}{};", | ||
| 382 | coord_vec, dref_swizzle, dref_val, sparse_mod, ret, coord_vec, texture, | ||
| 383 | type, offset_vec); | ||
| 384 | } else { | ||
| 385 | ctx.Add("TEX.F{} {},{},{},{},{}{};", sparse_mod, ret, coord_vec, dref_val, texture, | ||
| 386 | type, offset_vec); | ||
| 387 | } | ||
| 388 | } | ||
| 389 | } | ||
| 390 | StoreSparse(ctx, sparse_inst); | ||
| 391 | } | ||
| 392 | |||
| 393 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 394 | const IR::Value& coord, const IR::Value& dref, | ||
| 395 | const IR::Value& lod, const IR::Value& offset) { | ||
| 396 | // Allocate early to avoid aliases | ||
| 397 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 398 | ScopedRegister staging; | ||
| 399 | if (info.type == TextureType::ColorArrayCube) { | ||
| 400 | staging = ScopedRegister{ctx.reg_alloc}; | ||
| 401 | } | ||
| 402 | const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; | ||
| 403 | const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; | ||
| 404 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 405 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 406 | const std::string_view type{TextureType(info)}; | ||
| 407 | const std::string texture{Texture(ctx, info, index)}; | ||
| 408 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 409 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 410 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 411 | switch (info.type) { | ||
| 412 | case TextureType::Color1D: | ||
| 413 | case TextureType::ColorArray1D: | ||
| 414 | case TextureType::Color2D: | ||
| 415 | ctx.Add("MOV.F {}.z,{};" | ||
| 416 | "MOV.F {}.w,{};" | ||
| 417 | "TXL.F{} {},{},{},{}{};", | ||
| 418 | coord_vec, dref_val, coord_vec, lod_val, sparse_mod, ret, coord_vec, texture, type, | ||
| 419 | offset_vec); | ||
| 420 | break; | ||
| 421 | case TextureType::ColorArray2D: | ||
| 422 | case TextureType::ColorCube: | ||
| 423 | ctx.Add("MOV.F {}.w,{};" | ||
| 424 | "TXL.F{} {},{},{},{},{}{};", | ||
| 425 | coord_vec, dref_val, sparse_mod, ret, coord_vec, lod_val, texture, type, | ||
| 426 | offset_vec); | ||
| 427 | break; | ||
| 428 | case TextureType::ColorArrayCube: | ||
| 429 | ctx.Add("MOV.F {}.x,{};" | ||
| 430 | "MOV.F {}.y,{};" | ||
| 431 | "TXL.F{} {},{},{},{},{}{};", | ||
| 432 | staging.reg, dref_val, staging.reg, lod_val, sparse_mod, ret, coord_vec, | ||
| 433 | staging.reg, texture, type, offset_vec); | ||
| 434 | break; | ||
| 435 | default: | ||
| 436 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 437 | } | ||
| 438 | StoreSparse(ctx, sparse_inst); | ||
| 439 | } | ||
| 440 | |||
| 441 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 442 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2) { | ||
| 443 | // Allocate offsets early so they don't overwrite any consumed register | ||
| 444 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | ||
| 445 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 446 | const char comp{"xyzw"[info.gather_component]}; | ||
| 447 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 448 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 449 | const std::string_view type{TextureType(info)}; | ||
| 450 | const std::string texture{Texture(ctx, info, index)}; | ||
| 451 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 452 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 453 | if (offset2.IsEmpty()) { | ||
| 454 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 455 | ctx.Add("TXG.F{} {},{},{}.{},{}{};", sparse_mod, ret, coord_vec, texture, comp, type, | ||
| 456 | offset_vec); | ||
| 457 | } else { | ||
| 458 | SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); | ||
| 459 | ctx.Add("TXGO.F{} {},{},{},{},{}.{},{};", sparse_mod, ret, coord_vec, off_x.reg, off_y.reg, | ||
| 460 | texture, comp, type); | ||
| 461 | } | ||
| 462 | StoreSparse(ctx, sparse_inst); | ||
| 463 | } | ||
| 464 | |||
| 465 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 466 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, | ||
| 467 | const IR::Value& dref) { | ||
| 468 | // FIXME: This instruction is not working as expected | ||
| 469 | |||
| 470 | // Allocate offsets early so they don't overwrite any consumed register | ||
| 471 | const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; | ||
| 472 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 473 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 474 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 475 | const std::string_view type{TextureType(info)}; | ||
| 476 | const std::string texture{Texture(ctx, info, index)}; | ||
| 477 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 478 | const ScalarF32 dref_value{ctx.reg_alloc.Consume(dref)}; | ||
| 479 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 480 | std::string args; | ||
| 481 | switch (info.type) { | ||
| 482 | case TextureType::Color2D: | ||
| 483 | ctx.Add("MOV.F {}.z,{};", coord_vec, dref_value); | ||
| 484 | args = fmt::to_string(coord_vec); | ||
| 485 | break; | ||
| 486 | case TextureType::ColorArray2D: | ||
| 487 | case TextureType::ColorCube: | ||
| 488 | ctx.Add("MOV.F {}.w,{};", coord_vec, dref_value); | ||
| 489 | args = fmt::to_string(coord_vec); | ||
| 490 | break; | ||
| 491 | case TextureType::ColorArrayCube: | ||
| 492 | args = fmt::format("{},{}", coord_vec, dref_value); | ||
| 493 | break; | ||
| 494 | default: | ||
| 495 | throw NotImplementedException("Invalid type {}", info.type.Value()); | ||
| 496 | } | ||
| 497 | if (offset2.IsEmpty()) { | ||
| 498 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 499 | ctx.Add("TXG.F{} {},{},{},{}{};", sparse_mod, ret, args, texture, type, offset_vec); | ||
| 500 | } else { | ||
| 501 | SwizzleOffsets(ctx, off_x.reg, off_y.reg, offset, offset2); | ||
| 502 | ctx.Add("TXGO.F{} {},{},{},{},{},{};", sparse_mod, ret, args, off_x.reg, off_y.reg, texture, | ||
| 503 | type); | ||
| 504 | } | ||
| 505 | StoreSparse(ctx, sparse_inst); | ||
| 506 | } | ||
| 507 | |||
| 508 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 509 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { | ||
| 510 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 511 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 512 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 513 | const std::string_view type{TextureType(info)}; | ||
| 514 | const std::string texture{Texture(ctx, info, index)}; | ||
| 515 | const std::string offset_vec{Offset(ctx, offset)}; | ||
| 516 | const auto [coord_vec, coord_alloc]{Coord(ctx, coord)}; | ||
| 517 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 518 | if (info.type == TextureType::Buffer) { | ||
| 519 | ctx.Add("TXF.F{} {},{},{},{}{};", sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 520 | } else if (ms.type != Type::Void) { | ||
| 521 | ctx.Add("MOV.S {}.w,{};" | ||
| 522 | "TXFMS.F{} {},{},{},{}{};", | ||
| 523 | coord_vec, ms, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 524 | } else { | ||
| 525 | ctx.Add("MOV.S {}.w,{};" | ||
| 526 | "TXF.F{} {},{},{},{}{};", | ||
| 527 | coord_vec, lod, sparse_mod, ret, coord_vec, texture, type, offset_vec); | ||
| 528 | } | ||
| 529 | StoreSparse(ctx, sparse_inst); | ||
| 530 | } | ||
| 531 | |||
| 532 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 533 | ScalarS32 lod) { | ||
| 534 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 535 | const std::string texture{Texture(ctx, info, index)}; | ||
| 536 | const std::string_view type{TextureType(info)}; | ||
| 537 | ctx.Add("TXQ {},{},{},{};", inst, lod, texture, type); | ||
| 538 | } | ||
| 539 | |||
| 540 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | ||
| 541 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 542 | const std::string texture{Texture(ctx, info, index)}; | ||
| 543 | const std::string_view type{TextureType(info)}; | ||
| 544 | ctx.Add("LOD.F {},{},{},{};", inst, coord, texture, type); | ||
| 545 | } | ||
| 546 | |||
| 547 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 548 | const IR::Value& coord, const IR::Value& derivatives, | ||
| 549 | const IR::Value& offset, const IR::Value& lod_clamp) { | ||
| 550 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 551 | ScopedRegister dpdx, dpdy; | ||
| 552 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | ||
| 553 | if (multi_component) { | ||
| 554 | // Allocate this early to avoid aliasing other registers | ||
| 555 | dpdx = ScopedRegister{ctx.reg_alloc}; | ||
| 556 | dpdy = ScopedRegister{ctx.reg_alloc}; | ||
| 557 | } | ||
| 558 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 559 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 560 | const std::string_view type{TextureType(info)}; | ||
| 561 | const std::string texture{Texture(ctx, info, index)}; | ||
| 562 | const std::string offset_vec{GradOffset(offset)}; | ||
| 563 | const Register coord_vec{ctx.reg_alloc.Consume(coord)}; | ||
| 564 | const Register derivatives_vec{ctx.reg_alloc.Consume(derivatives)}; | ||
| 565 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 566 | if (multi_component) { | ||
| 567 | ctx.Add("MOV.F {}.x,{}.x;" | ||
| 568 | "MOV.F {}.y,{}.z;" | ||
| 569 | "MOV.F {}.x,{}.y;" | ||
| 570 | "MOV.F {}.y,{}.w;", | ||
| 571 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | ||
| 572 | dpdy.reg, derivatives_vec); | ||
| 573 | if (info.has_lod_clamp) { | ||
| 574 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; | ||
| 575 | ctx.Add("MOV.F {}.w,{};" | ||
| 576 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", | ||
| 577 | dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | ||
| 578 | texture, type, offset_vec); | ||
| 579 | } else { | ||
| 580 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | ||
| 581 | texture, type, offset_vec); | ||
| 582 | } | ||
| 583 | } else { | ||
| 584 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, | ||
| 585 | derivatives_vec, texture, type, offset_vec); | ||
| 586 | } | ||
| 587 | StoreSparse(ctx, sparse_inst); | ||
| 588 | } | ||
| 589 | |||
| 590 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { | ||
| 591 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 592 | const auto sparse_inst{PrepareSparse(inst)}; | ||
| 593 | const std::string_view format{FormatStorage(info.image_format)}; | ||
| 594 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | ||
| 595 | const std::string_view type{TextureType(info)}; | ||
| 596 | const std::string image{Image(ctx, info, index)}; | ||
| 597 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 598 | ctx.Add("LOADIM.{}{} {},{},{},{};", format, sparse_mod, ret, coord, image, type); | ||
| 599 | StoreSparse(ctx, sparse_inst); | ||
| 600 | } | ||
| 601 | |||
| 602 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 603 | Register color) { | ||
| 604 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | ||
| 605 | const std::string_view format{FormatStorage(info.image_format)}; | ||
| 606 | const std::string_view type{TextureType(info)}; | ||
| 607 | const std::string image{Image(ctx, info, index)}; | ||
| 608 | ctx.Add("STOREIM.{} {},{},{},{};", format, image, color, coord, type); | ||
| 609 | } | ||
| 610 | |||
| 611 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 612 | ScalarU32 value) { | ||
| 613 | ImageAtomic(ctx, inst, index, coord, value, "ADD.U32"); | ||
| 614 | } | ||
| 615 | |||
| 616 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 617 | ScalarS32 value) { | ||
| 618 | ImageAtomic(ctx, inst, index, coord, value, "MIN.S32"); | ||
| 619 | } | ||
| 620 | |||
| 621 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 622 | ScalarU32 value) { | ||
| 623 | ImageAtomic(ctx, inst, index, coord, value, "MIN.U32"); | ||
| 624 | } | ||
| 625 | |||
| 626 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 627 | ScalarS32 value) { | ||
| 628 | ImageAtomic(ctx, inst, index, coord, value, "MAX.S32"); | ||
| 629 | } | ||
| 630 | |||
| 631 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 632 | ScalarU32 value) { | ||
| 633 | ImageAtomic(ctx, inst, index, coord, value, "MAX.U32"); | ||
| 634 | } | ||
| 635 | |||
| 636 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 637 | ScalarU32 value) { | ||
| 638 | ImageAtomic(ctx, inst, index, coord, value, "IWRAP.U32"); | ||
| 639 | } | ||
| 640 | |||
| 641 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 642 | ScalarU32 value) { | ||
| 643 | ImageAtomic(ctx, inst, index, coord, value, "DWRAP.U32"); | ||
| 644 | } | ||
| 645 | |||
| 646 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 647 | ScalarU32 value) { | ||
| 648 | ImageAtomic(ctx, inst, index, coord, value, "AND.U32"); | ||
| 649 | } | ||
| 650 | |||
| 651 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 652 | ScalarU32 value) { | ||
| 653 | ImageAtomic(ctx, inst, index, coord, value, "OR.U32"); | ||
| 654 | } | ||
| 655 | |||
| 656 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 657 | ScalarU32 value) { | ||
| 658 | ImageAtomic(ctx, inst, index, coord, value, "XOR.U32"); | ||
| 659 | } | ||
| 660 | |||
| 661 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 662 | Register coord, ScalarU32 value) { | ||
| 663 | ImageAtomic(ctx, inst, index, coord, value, "EXCH.U32"); | ||
| 664 | } | ||
| 665 | |||
| 666 | void EmitBindlessImageSampleImplicitLod(EmitContext&) { | ||
| 667 | throw LogicError("Unreachable instruction"); | ||
| 668 | } | ||
| 669 | |||
| 670 | void EmitBindlessImageSampleExplicitLod(EmitContext&) { | ||
| 671 | throw LogicError("Unreachable instruction"); | ||
| 672 | } | ||
| 673 | |||
| 674 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { | ||
| 675 | throw LogicError("Unreachable instruction"); | ||
| 676 | } | ||
| 677 | |||
| 678 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { | ||
| 679 | throw LogicError("Unreachable instruction"); | ||
| 680 | } | ||
| 681 | |||
| 682 | void EmitBindlessImageGather(EmitContext&) { | ||
| 683 | throw LogicError("Unreachable instruction"); | ||
| 684 | } | ||
| 685 | |||
| 686 | void EmitBindlessImageGatherDref(EmitContext&) { | ||
| 687 | throw LogicError("Unreachable instruction"); | ||
| 688 | } | ||
| 689 | |||
| 690 | void EmitBindlessImageFetch(EmitContext&) { | ||
| 691 | throw LogicError("Unreachable instruction"); | ||
| 692 | } | ||
| 693 | |||
| 694 | void EmitBindlessImageQueryDimensions(EmitContext&) { | ||
| 695 | throw LogicError("Unreachable instruction"); | ||
| 696 | } | ||
| 697 | |||
| 698 | void EmitBindlessImageQueryLod(EmitContext&) { | ||
| 699 | throw LogicError("Unreachable instruction"); | ||
| 700 | } | ||
| 701 | |||
| 702 | void EmitBindlessImageGradient(EmitContext&) { | ||
| 703 | throw LogicError("Unreachable instruction"); | ||
| 704 | } | ||
| 705 | |||
| 706 | void EmitBindlessImageRead(EmitContext&) { | ||
| 707 | throw LogicError("Unreachable instruction"); | ||
| 708 | } | ||
| 709 | |||
| 710 | void EmitBindlessImageWrite(EmitContext&) { | ||
| 711 | throw LogicError("Unreachable instruction"); | ||
| 712 | } | ||
| 713 | |||
| 714 | void EmitBoundImageSampleImplicitLod(EmitContext&) { | ||
| 715 | throw LogicError("Unreachable instruction"); | ||
| 716 | } | ||
| 717 | |||
| 718 | void EmitBoundImageSampleExplicitLod(EmitContext&) { | ||
| 719 | throw LogicError("Unreachable instruction"); | ||
| 720 | } | ||
| 721 | |||
| 722 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&) { | ||
| 723 | throw LogicError("Unreachable instruction"); | ||
| 724 | } | ||
| 725 | |||
| 726 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&) { | ||
| 727 | throw LogicError("Unreachable instruction"); | ||
| 728 | } | ||
| 729 | |||
| 730 | void EmitBoundImageGather(EmitContext&) { | ||
| 731 | throw LogicError("Unreachable instruction"); | ||
| 732 | } | ||
| 733 | |||
| 734 | void EmitBoundImageGatherDref(EmitContext&) { | ||
| 735 | throw LogicError("Unreachable instruction"); | ||
| 736 | } | ||
| 737 | |||
| 738 | void EmitBoundImageFetch(EmitContext&) { | ||
| 739 | throw LogicError("Unreachable instruction"); | ||
| 740 | } | ||
| 741 | |||
| 742 | void EmitBoundImageQueryDimensions(EmitContext&) { | ||
| 743 | throw LogicError("Unreachable instruction"); | ||
| 744 | } | ||
| 745 | |||
| 746 | void EmitBoundImageQueryLod(EmitContext&) { | ||
| 747 | throw LogicError("Unreachable instruction"); | ||
| 748 | } | ||
| 749 | |||
| 750 | void EmitBoundImageGradient(EmitContext&) { | ||
| 751 | throw LogicError("Unreachable instruction"); | ||
| 752 | } | ||
| 753 | |||
| 754 | void EmitBoundImageRead(EmitContext&) { | ||
| 755 | throw LogicError("Unreachable instruction"); | ||
| 756 | } | ||
| 757 | |||
| 758 | void EmitBoundImageWrite(EmitContext&) { | ||
| 759 | throw LogicError("Unreachable instruction"); | ||
| 760 | } | ||
| 761 | |||
| 762 | void EmitBindlessImageAtomicIAdd32(EmitContext&) { | ||
| 763 | throw LogicError("Unreachable instruction"); | ||
| 764 | } | ||
| 765 | |||
| 766 | void EmitBindlessImageAtomicSMin32(EmitContext&) { | ||
| 767 | throw LogicError("Unreachable instruction"); | ||
| 768 | } | ||
| 769 | |||
| 770 | void EmitBindlessImageAtomicUMin32(EmitContext&) { | ||
| 771 | throw LogicError("Unreachable instruction"); | ||
| 772 | } | ||
| 773 | |||
| 774 | void EmitBindlessImageAtomicSMax32(EmitContext&) { | ||
| 775 | throw LogicError("Unreachable instruction"); | ||
| 776 | } | ||
| 777 | |||
| 778 | void EmitBindlessImageAtomicUMax32(EmitContext&) { | ||
| 779 | throw LogicError("Unreachable instruction"); | ||
| 780 | } | ||
| 781 | |||
| 782 | void EmitBindlessImageAtomicInc32(EmitContext&) { | ||
| 783 | throw LogicError("Unreachable instruction"); | ||
| 784 | } | ||
| 785 | |||
| 786 | void EmitBindlessImageAtomicDec32(EmitContext&) { | ||
| 787 | throw LogicError("Unreachable instruction"); | ||
| 788 | } | ||
| 789 | |||
| 790 | void EmitBindlessImageAtomicAnd32(EmitContext&) { | ||
| 791 | throw LogicError("Unreachable instruction"); | ||
| 792 | } | ||
| 793 | |||
| 794 | void EmitBindlessImageAtomicOr32(EmitContext&) { | ||
| 795 | throw LogicError("Unreachable instruction"); | ||
| 796 | } | ||
| 797 | |||
| 798 | void EmitBindlessImageAtomicXor32(EmitContext&) { | ||
| 799 | throw LogicError("Unreachable instruction"); | ||
| 800 | } | ||
| 801 | |||
| 802 | void EmitBindlessImageAtomicExchange32(EmitContext&) { | ||
| 803 | throw LogicError("Unreachable instruction"); | ||
| 804 | } | ||
| 805 | |||
| 806 | void EmitBoundImageAtomicIAdd32(EmitContext&) { | ||
| 807 | throw LogicError("Unreachable instruction"); | ||
| 808 | } | ||
| 809 | |||
| 810 | void EmitBoundImageAtomicSMin32(EmitContext&) { | ||
| 811 | throw LogicError("Unreachable instruction"); | ||
| 812 | } | ||
| 813 | |||
| 814 | void EmitBoundImageAtomicUMin32(EmitContext&) { | ||
| 815 | throw LogicError("Unreachable instruction"); | ||
| 816 | } | ||
| 817 | |||
| 818 | void EmitBoundImageAtomicSMax32(EmitContext&) { | ||
| 819 | throw LogicError("Unreachable instruction"); | ||
| 820 | } | ||
| 821 | |||
| 822 | void EmitBoundImageAtomicUMax32(EmitContext&) { | ||
| 823 | throw LogicError("Unreachable instruction"); | ||
| 824 | } | ||
| 825 | |||
| 826 | void EmitBoundImageAtomicInc32(EmitContext&) { | ||
| 827 | throw LogicError("Unreachable instruction"); | ||
| 828 | } | ||
| 829 | |||
| 830 | void EmitBoundImageAtomicDec32(EmitContext&) { | ||
| 831 | throw LogicError("Unreachable instruction"); | ||
| 832 | } | ||
| 833 | |||
| 834 | void EmitBoundImageAtomicAnd32(EmitContext&) { | ||
| 835 | throw LogicError("Unreachable instruction"); | ||
| 836 | } | ||
| 837 | |||
| 838 | void EmitBoundImageAtomicOr32(EmitContext&) { | ||
| 839 | throw LogicError("Unreachable instruction"); | ||
| 840 | } | ||
| 841 | |||
| 842 | void EmitBoundImageAtomicXor32(EmitContext&) { | ||
| 843 | throw LogicError("Unreachable instruction"); | ||
| 844 | } | ||
| 845 | |||
| 846 | void EmitBoundImageAtomicExchange32(EmitContext&) { | ||
| 847 | throw LogicError("Unreachable instruction"); | ||
| 848 | } | ||
| 849 | |||
| 850 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h new file mode 100644 index 000000000..12afda43b --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_instructions.h | |||
| @@ -0,0 +1,625 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 9 | |||
| 10 | namespace Shader::IR { | ||
| 11 | enum class Attribute : u64; | ||
| 12 | enum class Patch : u64; | ||
| 13 | class Inst; | ||
| 14 | class Value; | ||
| 15 | } // namespace Shader::IR | ||
| 16 | |||
| 17 | namespace Shader::Backend::GLASM { | ||
| 18 | |||
| 19 | class EmitContext; | ||
| 20 | |||
| 21 | // Microinstruction emitters | ||
| 22 | void EmitPhi(EmitContext& ctx, IR::Inst& inst); | ||
| 23 | void EmitVoid(EmitContext& ctx); | ||
| 24 | void EmitIdentity(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 25 | void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 26 | void EmitReference(EmitContext&, const IR::Value& value); | ||
| 27 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi, const IR::Value& value); | ||
| 28 | void EmitJoin(EmitContext& ctx); | ||
| 29 | void EmitDemoteToHelperInvocation(EmitContext& ctx); | ||
| 30 | void EmitBarrier(EmitContext& ctx); | ||
| 31 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx); | ||
| 32 | void EmitDeviceMemoryBarrier(EmitContext& ctx); | ||
| 33 | void EmitPrologue(EmitContext& ctx); | ||
| 34 | void EmitEpilogue(EmitContext& ctx); | ||
| 35 | void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream); | ||
| 36 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||
| 37 | void EmitGetRegister(EmitContext& ctx); | ||
| 38 | void EmitSetRegister(EmitContext& ctx); | ||
| 39 | void EmitGetPred(EmitContext& ctx); | ||
| 40 | void EmitSetPred(EmitContext& ctx); | ||
| 41 | void EmitSetGotoVariable(EmitContext& ctx); | ||
| 42 | void EmitGetGotoVariable(EmitContext& ctx); | ||
| 43 | void EmitSetIndirectBranchVariable(EmitContext& ctx); | ||
| 44 | void EmitGetIndirectBranchVariable(EmitContext& ctx); | ||
| 45 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 46 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 47 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 48 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 49 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 50 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 51 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset); | ||
| 52 | void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr, ScalarU32 vertex); | ||
| 53 | void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, ScalarF32 value, ScalarU32 vertex); | ||
| 54 | void EmitGetAttributeIndexed(EmitContext& ctx, IR::Inst& inst, ScalarS32 offset, ScalarU32 vertex); | ||
| 55 | void EmitSetAttributeIndexed(EmitContext& ctx, ScalarU32 offset, ScalarF32 value, ScalarU32 vertex); | ||
| 56 | void EmitGetPatch(EmitContext& ctx, IR::Inst& inst, IR::Patch patch); | ||
| 57 | void EmitSetPatch(EmitContext& ctx, IR::Patch patch, ScalarF32 value); | ||
| 58 | void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, ScalarF32 value); | ||
| 59 | void EmitSetSampleMask(EmitContext& ctx, ScalarS32 value); | ||
| 60 | void EmitSetFragDepth(EmitContext& ctx, ScalarF32 value); | ||
| 61 | void EmitGetZFlag(EmitContext& ctx); | ||
| 62 | void EmitGetSFlag(EmitContext& ctx); | ||
| 63 | void EmitGetCFlag(EmitContext& ctx); | ||
| 64 | void EmitGetOFlag(EmitContext& ctx); | ||
| 65 | void EmitSetZFlag(EmitContext& ctx); | ||
| 66 | void EmitSetSFlag(EmitContext& ctx); | ||
| 67 | void EmitSetCFlag(EmitContext& ctx); | ||
| 68 | void EmitSetOFlag(EmitContext& ctx); | ||
| 69 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst); | ||
| 70 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 71 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst); | ||
| 72 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst); | ||
| 73 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst); | ||
| 74 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst); | ||
| 75 | void EmitLoadLocal(EmitContext& ctx, IR::Inst& inst, ScalarU32 word_offset); | ||
| 76 | void EmitWriteLocal(EmitContext& ctx, ScalarU32 word_offset, ScalarU32 value); | ||
| 77 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst); | ||
| 78 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst); | ||
| 79 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst); | ||
| 80 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst); | ||
| 81 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst); | ||
| 82 | void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 83 | void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 84 | void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 85 | void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 86 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 87 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 88 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address); | ||
| 89 | void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value); | ||
| 90 | void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value); | ||
| 91 | void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value); | ||
| 92 | void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value); | ||
| 93 | void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value); | ||
| 94 | void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value); | ||
| 95 | void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value); | ||
| 96 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 97 | ScalarU32 offset); | ||
| 98 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 99 | ScalarU32 offset); | ||
| 100 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 101 | ScalarU32 offset); | ||
| 102 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 103 | ScalarU32 offset); | ||
| 104 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 105 | ScalarU32 offset); | ||
| 106 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 107 | ScalarU32 offset); | ||
| 108 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 109 | ScalarU32 offset); | ||
| 110 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 111 | ScalarU32 value); | ||
| 112 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 113 | ScalarS32 value); | ||
| 114 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 115 | ScalarU32 value); | ||
| 116 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 117 | ScalarS32 value); | ||
| 118 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 119 | ScalarU32 value); | ||
| 120 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 121 | Register value); | ||
| 122 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 123 | Register value); | ||
| 124 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 125 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 126 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 127 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 128 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 129 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 130 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset); | ||
| 131 | void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 132 | void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 133 | void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value); | ||
| 134 | void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value); | ||
| 135 | void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value); | ||
| 136 | void EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 137 | const IR::Value& e2); | ||
| 138 | void EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 139 | const IR::Value& e2, const IR::Value& e3); | ||
| 140 | void EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 141 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); | ||
| 142 | void EmitCompositeExtractU32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 143 | void EmitCompositeExtractU32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 144 | void EmitCompositeExtractU32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 145 | void EmitCompositeInsertU32x2(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 146 | void EmitCompositeInsertU32x3(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 147 | void EmitCompositeInsertU32x4(EmitContext& ctx, Register composite, ScalarU32 object, u32 index); | ||
| 148 | void EmitCompositeConstructF16x2(EmitContext& ctx, Register e1, Register e2); | ||
| 149 | void EmitCompositeConstructF16x3(EmitContext& ctx, Register e1, Register e2, Register e3); | ||
| 150 | void EmitCompositeConstructF16x4(EmitContext& ctx, Register e1, Register e2, Register e3, | ||
| 151 | Register e4); | ||
| 152 | void EmitCompositeExtractF16x2(EmitContext& ctx, Register composite, u32 index); | ||
| 153 | void EmitCompositeExtractF16x3(EmitContext& ctx, Register composite, u32 index); | ||
| 154 | void EmitCompositeExtractF16x4(EmitContext& ctx, Register composite, u32 index); | ||
| 155 | void EmitCompositeInsertF16x2(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 156 | void EmitCompositeInsertF16x3(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 157 | void EmitCompositeInsertF16x4(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 158 | void EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 159 | const IR::Value& e2); | ||
| 160 | void EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 161 | const IR::Value& e2, const IR::Value& e3); | ||
| 162 | void EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst& inst, const IR::Value& e1, | ||
| 163 | const IR::Value& e2, const IR::Value& e3, const IR::Value& e4); | ||
| 164 | void EmitCompositeExtractF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 165 | void EmitCompositeExtractF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 166 | void EmitCompositeExtractF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, u32 index); | ||
| 167 | void EmitCompositeInsertF32x2(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 168 | ScalarF32 object, u32 index); | ||
| 169 | void EmitCompositeInsertF32x3(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 170 | ScalarF32 object, u32 index); | ||
| 171 | void EmitCompositeInsertF32x4(EmitContext& ctx, IR::Inst& inst, Register composite, | ||
| 172 | ScalarF32 object, u32 index); | ||
| 173 | void EmitCompositeConstructF64x2(EmitContext& ctx); | ||
| 174 | void EmitCompositeConstructF64x3(EmitContext& ctx); | ||
| 175 | void EmitCompositeConstructF64x4(EmitContext& ctx); | ||
| 176 | void EmitCompositeExtractF64x2(EmitContext& ctx); | ||
| 177 | void EmitCompositeExtractF64x3(EmitContext& ctx); | ||
| 178 | void EmitCompositeExtractF64x4(EmitContext& ctx); | ||
| 179 | void EmitCompositeInsertF64x2(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 180 | void EmitCompositeInsertF64x3(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 181 | void EmitCompositeInsertF64x4(EmitContext& ctx, Register composite, Register object, u32 index); | ||
| 182 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 183 | ScalarS32 false_value); | ||
| 184 | void EmitSelectU8(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); | ||
| 185 | void EmitSelectU16(EmitContext& ctx, ScalarS32 cond, ScalarS32 true_value, ScalarS32 false_value); | ||
| 186 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 187 | ScalarS32 false_value); | ||
| 188 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, | ||
| 189 | Register false_value); | ||
| 190 | void EmitSelectF16(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); | ||
| 191 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 192 | ScalarS32 false_value); | ||
| 193 | void EmitSelectF64(EmitContext& ctx, ScalarS32 cond, Register true_value, Register false_value); | ||
| 194 | void EmitBitCastU16F16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 195 | void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 196 | void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 197 | void EmitBitCastF16U16(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 198 | void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 199 | void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, const IR::Value& value); | ||
| 200 | void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 201 | void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 202 | void EmitPackFloat2x16(EmitContext& ctx, Register value); | ||
| 203 | void EmitUnpackFloat2x16(EmitContext& ctx, Register value); | ||
| 204 | void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 205 | void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 206 | void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 207 | void EmitUnpackDouble2x32(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 208 | void EmitGetZeroFromOp(EmitContext& ctx); | ||
| 209 | void EmitGetSignFromOp(EmitContext& ctx); | ||
| 210 | void EmitGetCarryFromOp(EmitContext& ctx); | ||
| 211 | void EmitGetOverflowFromOp(EmitContext& ctx); | ||
| 212 | void EmitGetSparseFromOp(EmitContext& ctx); | ||
| 213 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 214 | void EmitFPAbs16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 215 | void EmitFPAbs32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 216 | void EmitFPAbs64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 217 | void EmitFPAdd16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 218 | void EmitFPAdd32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 219 | void EmitFPAdd64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 220 | void EmitFPFma16(EmitContext& ctx, IR::Inst& inst, Register a, Register b, Register c); | ||
| 221 | void EmitFPFma32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b, ScalarF32 c); | ||
| 222 | void EmitFPFma64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b, ScalarF64 c); | ||
| 223 | void EmitFPMax32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 224 | void EmitFPMax64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 225 | void EmitFPMin32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 226 | void EmitFPMin64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 227 | void EmitFPMul16(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 228 | void EmitFPMul32(EmitContext& ctx, IR::Inst& inst, ScalarF32 a, ScalarF32 b); | ||
| 229 | void EmitFPMul64(EmitContext& ctx, IR::Inst& inst, ScalarF64 a, ScalarF64 b); | ||
| 230 | void EmitFPNeg16(EmitContext& ctx, Register value); | ||
| 231 | void EmitFPNeg32(EmitContext& ctx, IR::Inst& inst, ScalarRegister value); | ||
| 232 | void EmitFPNeg64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 233 | void EmitFPSin(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 234 | void EmitFPCos(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 235 | void EmitFPExp2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 236 | void EmitFPLog2(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 237 | void EmitFPRecip32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 238 | void EmitFPRecip64(EmitContext& ctx, Register value); | ||
| 239 | void EmitFPRecipSqrt32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 240 | void EmitFPRecipSqrt64(EmitContext& ctx, Register value); | ||
| 241 | void EmitFPSqrt(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 242 | void EmitFPSaturate16(EmitContext& ctx, Register value); | ||
| 243 | void EmitFPSaturate32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 244 | void EmitFPSaturate64(EmitContext& ctx, Register value); | ||
| 245 | void EmitFPClamp16(EmitContext& ctx, Register value, Register min_value, Register max_value); | ||
| 246 | void EmitFPClamp32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value, ScalarF32 min_value, | ||
| 247 | ScalarF32 max_value); | ||
| 248 | void EmitFPClamp64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value, ScalarF64 min_value, | ||
| 249 | ScalarF64 max_value); | ||
| 250 | void EmitFPRoundEven16(EmitContext& ctx, Register value); | ||
| 251 | void EmitFPRoundEven32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 252 | void EmitFPRoundEven64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 253 | void EmitFPFloor16(EmitContext& ctx, Register value); | ||
| 254 | void EmitFPFloor32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 255 | void EmitFPFloor64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 256 | void EmitFPCeil16(EmitContext& ctx, Register value); | ||
| 257 | void EmitFPCeil32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 258 | void EmitFPCeil64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 259 | void EmitFPTrunc16(EmitContext& ctx, Register value); | ||
| 260 | void EmitFPTrunc32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 261 | void EmitFPTrunc64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 262 | void EmitFPOrdEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 263 | void EmitFPOrdEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 264 | void EmitFPOrdEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 265 | void EmitFPUnordEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 266 | void EmitFPUnordEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 267 | void EmitFPUnordEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 268 | void EmitFPOrdNotEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 269 | void EmitFPOrdNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 270 | void EmitFPOrdNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 271 | void EmitFPUnordNotEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 272 | void EmitFPUnordNotEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 273 | void EmitFPUnordNotEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 274 | void EmitFPOrdLessThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 275 | void EmitFPOrdLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 276 | void EmitFPOrdLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 277 | void EmitFPUnordLessThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 278 | void EmitFPUnordLessThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 279 | void EmitFPUnordLessThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 280 | void EmitFPOrdGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 281 | void EmitFPOrdGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 282 | void EmitFPOrdGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 283 | void EmitFPUnordGreaterThan16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 284 | void EmitFPUnordGreaterThan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 285 | void EmitFPUnordGreaterThan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 286 | void EmitFPOrdLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 287 | void EmitFPOrdLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 288 | void EmitFPOrdLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 289 | void EmitFPUnordLessThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 290 | void EmitFPUnordLessThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 291 | void EmitFPUnordLessThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 292 | void EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 293 | void EmitFPOrdGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 294 | void EmitFPOrdGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 295 | void EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Register lhs, Register rhs); | ||
| 296 | void EmitFPUnordGreaterThanEqual32(EmitContext& ctx, IR::Inst& inst, ScalarF32 lhs, ScalarF32 rhs); | ||
| 297 | void EmitFPUnordGreaterThanEqual64(EmitContext& ctx, IR::Inst& inst, ScalarF64 lhs, ScalarF64 rhs); | ||
| 298 | void EmitFPIsNan16(EmitContext& ctx, Register value); | ||
| 299 | void EmitFPIsNan32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 300 | void EmitFPIsNan64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 301 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 302 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 303 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 304 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b); | ||
| 305 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 306 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 307 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 308 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 309 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); | ||
| 310 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, ScalarU32 shift); | ||
| 311 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift); | ||
| 312 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 313 | ScalarU32 shift); | ||
| 314 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift); | ||
| 315 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 316 | ScalarS32 shift); | ||
| 317 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 318 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 319 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 320 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, | ||
| 321 | ScalarS32 offset, ScalarS32 count); | ||
| 322 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, | ||
| 323 | ScalarS32 count); | ||
| 324 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | ||
| 325 | ScalarU32 count); | ||
| 326 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 327 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 328 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 329 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 330 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 331 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 332 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 333 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 334 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b); | ||
| 335 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max); | ||
| 336 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max); | ||
| 337 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 338 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 339 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 340 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 341 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 342 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 343 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 344 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 345 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs); | ||
| 346 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs); | ||
| 347 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 348 | ScalarU32 value); | ||
| 349 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 350 | ScalarS32 value); | ||
| 351 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 352 | ScalarU32 value); | ||
| 353 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 354 | ScalarS32 value); | ||
| 355 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 356 | ScalarU32 value); | ||
| 357 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 358 | ScalarU32 value); | ||
| 359 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 360 | ScalarU32 value); | ||
| 361 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 362 | ScalarU32 value); | ||
| 363 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 364 | ScalarU32 value); | ||
| 365 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 366 | ScalarU32 value); | ||
| 367 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 368 | ScalarU32 value); | ||
| 369 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 370 | Register value); | ||
| 371 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 372 | ScalarU32 offset, ScalarU32 value); | ||
| 373 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 374 | ScalarU32 offset, ScalarS32 value); | ||
| 375 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 376 | ScalarU32 offset, ScalarU32 value); | ||
| 377 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 378 | ScalarU32 offset, ScalarS32 value); | ||
| 379 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 380 | ScalarU32 offset, ScalarU32 value); | ||
| 381 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 382 | ScalarU32 offset, ScalarU32 value); | ||
| 383 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 384 | ScalarU32 offset, ScalarU32 value); | ||
| 385 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 386 | ScalarU32 offset, ScalarU32 value); | ||
| 387 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 388 | ScalarU32 offset, ScalarU32 value); | ||
| 389 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 390 | ScalarU32 offset, ScalarU32 value); | ||
| 391 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 392 | ScalarU32 offset, ScalarU32 value); | ||
| 393 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 394 | ScalarU32 offset, Register value); | ||
| 395 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 396 | ScalarU32 offset, Register value); | ||
| 397 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 398 | ScalarU32 offset, Register value); | ||
| 399 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 400 | ScalarU32 offset, Register value); | ||
| 401 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 402 | ScalarU32 offset, Register value); | ||
| 403 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 404 | ScalarU32 offset, Register value); | ||
| 405 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 406 | ScalarU32 offset, Register value); | ||
| 407 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 408 | ScalarU32 offset, Register value); | ||
| 409 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 410 | ScalarU32 offset, Register value); | ||
| 411 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 412 | ScalarU32 offset, ScalarF32 value); | ||
| 413 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 414 | ScalarU32 offset, Register value); | ||
| 415 | void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 416 | ScalarU32 offset, Register value); | ||
| 417 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 418 | ScalarU32 offset, Register value); | ||
| 419 | void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 420 | ScalarU32 offset, Register value); | ||
| 421 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 422 | ScalarU32 offset, Register value); | ||
| 423 | void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 424 | ScalarU32 offset, Register value); | ||
| 425 | void EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 426 | void EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 427 | void EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 428 | void EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 429 | void EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 430 | void EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 431 | void EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 432 | void EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 433 | void EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 434 | void EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 435 | void EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 436 | void EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 437 | void EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 438 | void EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 439 | void EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 440 | void EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 441 | void EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 442 | void EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 443 | void EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 444 | void EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 445 | void EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 446 | void EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 447 | void EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 448 | void EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 449 | void EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 450 | void EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 451 | void EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 452 | void EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 453 | void EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 454 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 455 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 456 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b); | ||
| 457 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 458 | void EmitConvertS16F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 459 | void EmitConvertS16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 460 | void EmitConvertS16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 461 | void EmitConvertS32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 462 | void EmitConvertS32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 463 | void EmitConvertS32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 464 | void EmitConvertS64F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 465 | void EmitConvertS64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 466 | void EmitConvertS64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 467 | void EmitConvertU16F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 468 | void EmitConvertU16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 469 | void EmitConvertU16F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 470 | void EmitConvertU32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 471 | void EmitConvertU32F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 472 | void EmitConvertU32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 473 | void EmitConvertU64F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 474 | void EmitConvertU64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 475 | void EmitConvertU64F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 476 | void EmitConvertU64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 477 | void EmitConvertU32U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 478 | void EmitConvertF16F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 479 | void EmitConvertF32F16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 480 | void EmitConvertF32F64(EmitContext& ctx, IR::Inst& inst, ScalarF64 value); | ||
| 481 | void EmitConvertF64F32(EmitContext& ctx, IR::Inst& inst, ScalarF32 value); | ||
| 482 | void EmitConvertF16S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 483 | void EmitConvertF16S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 484 | void EmitConvertF16S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 485 | void EmitConvertF16S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 486 | void EmitConvertF16U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 487 | void EmitConvertF16U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 488 | void EmitConvertF16U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 489 | void EmitConvertF16U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 490 | void EmitConvertF32S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 491 | void EmitConvertF32S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 492 | void EmitConvertF32S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 493 | void EmitConvertF32S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 494 | void EmitConvertF32U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 495 | void EmitConvertF32U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 496 | void EmitConvertF32U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 497 | void EmitConvertF32U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 498 | void EmitConvertF64S8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 499 | void EmitConvertF64S16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 500 | void EmitConvertF64S32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value); | ||
| 501 | void EmitConvertF64S64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 502 | void EmitConvertF64U8(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 503 | void EmitConvertF64U16(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 504 | void EmitConvertF64U32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value); | ||
| 505 | void EmitConvertF64U64(EmitContext& ctx, IR::Inst& inst, Register value); | ||
| 506 | void EmitBindlessImageSampleImplicitLod(EmitContext&); | ||
| 507 | void EmitBindlessImageSampleExplicitLod(EmitContext&); | ||
| 508 | void EmitBindlessImageSampleDrefImplicitLod(EmitContext&); | ||
| 509 | void EmitBindlessImageSampleDrefExplicitLod(EmitContext&); | ||
| 510 | void EmitBindlessImageGather(EmitContext&); | ||
| 511 | void EmitBindlessImageGatherDref(EmitContext&); | ||
| 512 | void EmitBindlessImageFetch(EmitContext&); | ||
| 513 | void EmitBindlessImageQueryDimensions(EmitContext&); | ||
| 514 | void EmitBindlessImageQueryLod(EmitContext&); | ||
| 515 | void EmitBindlessImageGradient(EmitContext&); | ||
| 516 | void EmitBindlessImageRead(EmitContext&); | ||
| 517 | void EmitBindlessImageWrite(EmitContext&); | ||
| 518 | void EmitBoundImageSampleImplicitLod(EmitContext&); | ||
| 519 | void EmitBoundImageSampleExplicitLod(EmitContext&); | ||
| 520 | void EmitBoundImageSampleDrefImplicitLod(EmitContext&); | ||
| 521 | void EmitBoundImageSampleDrefExplicitLod(EmitContext&); | ||
| 522 | void EmitBoundImageGather(EmitContext&); | ||
| 523 | void EmitBoundImageGatherDref(EmitContext&); | ||
| 524 | void EmitBoundImageFetch(EmitContext&); | ||
| 525 | void EmitBoundImageQueryDimensions(EmitContext&); | ||
| 526 | void EmitBoundImageQueryLod(EmitContext&); | ||
| 527 | void EmitBoundImageGradient(EmitContext&); | ||
| 528 | void EmitBoundImageRead(EmitContext&); | ||
| 529 | void EmitBoundImageWrite(EmitContext&); | ||
| 530 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 531 | const IR::Value& coord, Register bias_lc, const IR::Value& offset); | ||
| 532 | void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 533 | const IR::Value& coord, ScalarF32 lod, const IR::Value& offset); | ||
| 534 | void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 535 | const IR::Value& coord, const IR::Value& dref, | ||
| 536 | const IR::Value& bias_lc, const IR::Value& offset); | ||
| 537 | void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 538 | const IR::Value& coord, const IR::Value& dref, | ||
| 539 | const IR::Value& lod, const IR::Value& offset); | ||
| 540 | void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 541 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2); | ||
| 542 | void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 543 | const IR::Value& coord, const IR::Value& offset, const IR::Value& offset2, | ||
| 544 | const IR::Value& dref); | ||
| 545 | void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 546 | const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms); | ||
| 547 | void EmitImageQueryDimensions(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 548 | ScalarS32 lod); | ||
| 549 | void EmitImageQueryLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | ||
| 550 | void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 551 | const IR::Value& coord, const IR::Value& derivatives, | ||
| 552 | const IR::Value& offset, const IR::Value& lod_clamp); | ||
| 553 | void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord); | ||
| 554 | void EmitImageWrite(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 555 | Register color); | ||
| 556 | void EmitBindlessImageAtomicIAdd32(EmitContext&); | ||
| 557 | void EmitBindlessImageAtomicSMin32(EmitContext&); | ||
| 558 | void EmitBindlessImageAtomicUMin32(EmitContext&); | ||
| 559 | void EmitBindlessImageAtomicSMax32(EmitContext&); | ||
| 560 | void EmitBindlessImageAtomicUMax32(EmitContext&); | ||
| 561 | void EmitBindlessImageAtomicInc32(EmitContext&); | ||
| 562 | void EmitBindlessImageAtomicDec32(EmitContext&); | ||
| 563 | void EmitBindlessImageAtomicAnd32(EmitContext&); | ||
| 564 | void EmitBindlessImageAtomicOr32(EmitContext&); | ||
| 565 | void EmitBindlessImageAtomicXor32(EmitContext&); | ||
| 566 | void EmitBindlessImageAtomicExchange32(EmitContext&); | ||
| 567 | void EmitBoundImageAtomicIAdd32(EmitContext&); | ||
| 568 | void EmitBoundImageAtomicSMin32(EmitContext&); | ||
| 569 | void EmitBoundImageAtomicUMin32(EmitContext&); | ||
| 570 | void EmitBoundImageAtomicSMax32(EmitContext&); | ||
| 571 | void EmitBoundImageAtomicUMax32(EmitContext&); | ||
| 572 | void EmitBoundImageAtomicInc32(EmitContext&); | ||
| 573 | void EmitBoundImageAtomicDec32(EmitContext&); | ||
| 574 | void EmitBoundImageAtomicAnd32(EmitContext&); | ||
| 575 | void EmitBoundImageAtomicOr32(EmitContext&); | ||
| 576 | void EmitBoundImageAtomicXor32(EmitContext&); | ||
| 577 | void EmitBoundImageAtomicExchange32(EmitContext&); | ||
| 578 | void EmitImageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 579 | ScalarU32 value); | ||
| 580 | void EmitImageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 581 | ScalarS32 value); | ||
| 582 | void EmitImageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 583 | ScalarU32 value); | ||
| 584 | void EmitImageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 585 | ScalarS32 value); | ||
| 586 | void EmitImageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 587 | ScalarU32 value); | ||
| 588 | void EmitImageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 589 | ScalarU32 value); | ||
| 590 | void EmitImageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 591 | ScalarU32 value); | ||
| 592 | void EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 593 | ScalarU32 value); | ||
| 594 | void EmitImageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 595 | ScalarU32 value); | ||
| 596 | void EmitImageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord, | ||
| 597 | ScalarU32 value); | ||
| 598 | void EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | ||
| 599 | Register coord, ScalarU32 value); | ||
| 600 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst); | ||
| 601 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 602 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 603 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 604 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred); | ||
| 605 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst); | ||
| 606 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 607 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 608 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst); | ||
| 609 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst); | ||
| 610 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 611 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 612 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 613 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 614 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 615 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 616 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 617 | const IR::Value& clamp, const IR::Value& segmentation_mask); | ||
| 618 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||
| 619 | ScalarU32 swizzle); | ||
| 620 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 621 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 622 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 623 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||
| 624 | |||
| 625 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp new file mode 100644 index 000000000..f55c26b76 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | void BitwiseLogicalOp(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b, | ||
| 12 | std::string_view lop) { | ||
| 13 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 14 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 15 | if (zero) { | ||
| 16 | zero->Invalidate(); | ||
| 17 | } | ||
| 18 | if (sign) { | ||
| 19 | sign->Invalidate(); | ||
| 20 | } | ||
| 21 | if (zero || sign) { | ||
| 22 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 23 | } | ||
| 24 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 25 | ctx.Add("{}.S {}.x,{},{};", lop, ret, a, b); | ||
| 26 | if (zero) { | ||
| 27 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | ||
| 28 | } | ||
| 29 | if (sign) { | ||
| 30 | ctx.Add("SLT.S {},{},0;", *sign, ret); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 36 | const std::array flags{ | ||
| 37 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), | ||
| 38 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), | ||
| 39 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), | ||
| 40 | inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), | ||
| 41 | }; | ||
| 42 | for (IR::Inst* const flag_inst : flags) { | ||
| 43 | if (flag_inst) { | ||
| 44 | flag_inst->Invalidate(); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | const bool cc{inst.HasAssociatedPseudoOperation()}; | ||
| 48 | const std::string_view cc_mod{cc ? ".CC" : ""}; | ||
| 49 | if (cc) { | ||
| 50 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 51 | } | ||
| 52 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 53 | ctx.Add("ADD.S{} {}.x,{},{};", cc_mod, ret, a, b); | ||
| 54 | if (!cc) { | ||
| 55 | return; | ||
| 56 | } | ||
| 57 | static constexpr std::array<std::string_view, 4> masks{"", "SF", "CF", "OF"}; | ||
| 58 | for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { | ||
| 59 | if (!flags[flag_index]) { | ||
| 60 | continue; | ||
| 61 | } | ||
| 62 | const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; | ||
| 63 | if (flag_index == 0) { | ||
| 64 | ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); | ||
| 65 | } else { | ||
| 66 | // We could use conditional execution here, but it's broken on Nvidia's compiler | ||
| 67 | ctx.Add("IF {}.x;" | ||
| 68 | "MOV.S {}.x,-1;" | ||
| 69 | "ELSE;" | ||
| 70 | "MOV.S {}.x,0;" | ||
| 71 | "ENDIF;", | ||
| 72 | masks[flag_index], flag_ret, flag_ret); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | void EmitIAdd64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { | ||
| 78 | ctx.LongAdd("ADD.S64 {}.x,{}.x,{}.x;", inst, a, b); | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitISub32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 82 | ctx.Add("SUB.S {}.x,{},{};", inst, a, b); | ||
| 83 | } | ||
| 84 | |||
| 85 | void EmitISub64(EmitContext& ctx, IR::Inst& inst, Register a, Register b) { | ||
| 86 | ctx.LongAdd("SUB.S64 {}.x,{}.x,{}.x;", inst, a, b); | ||
| 87 | } | ||
| 88 | |||
| 89 | void EmitIMul32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 90 | ctx.Add("MUL.S {}.x,{},{};", inst, a, b); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitINeg32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 94 | if (value.type != Type::Register && static_cast<s32>(value.imm_u32) < 0) { | ||
| 95 | ctx.Add("MOV.S {},{};", inst, -static_cast<s32>(value.imm_u32)); | ||
| 96 | } else { | ||
| 97 | ctx.Add("MOV.S {},-{};", inst, value); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitINeg64(EmitContext& ctx, IR::Inst& inst, Register value) { | ||
| 102 | ctx.LongAdd("MOV.S64 {},-{};", inst, value); | ||
| 103 | } | ||
| 104 | |||
| 105 | void EmitIAbs32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 106 | ctx.Add("ABS.S {},{};", inst, value); | ||
| 107 | } | ||
| 108 | |||
| 109 | void EmitShiftLeftLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { | ||
| 110 | ctx.Add("SHL.U {}.x,{},{};", inst, base, shift); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitShiftLeftLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 114 | ScalarU32 shift) { | ||
| 115 | ctx.LongAdd("SHL.U64 {}.x,{},{};", inst, base, shift); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitShiftRightLogical32(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 shift) { | ||
| 119 | ctx.Add("SHR.U {}.x,{},{};", inst, base, shift); | ||
| 120 | } | ||
| 121 | |||
| 122 | void EmitShiftRightLogical64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 123 | ScalarU32 shift) { | ||
| 124 | ctx.LongAdd("SHR.U64 {}.x,{},{};", inst, base, shift); | ||
| 125 | } | ||
| 126 | |||
| 127 | void EmitShiftRightArithmetic32(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 shift) { | ||
| 128 | ctx.Add("SHR.S {}.x,{},{};", inst, base, shift); | ||
| 129 | } | ||
| 130 | |||
| 131 | void EmitShiftRightArithmetic64(EmitContext& ctx, IR::Inst& inst, ScalarRegister base, | ||
| 132 | ScalarS32 shift) { | ||
| 133 | ctx.LongAdd("SHR.S64 {}.x,{},{};", inst, base, shift); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitBitwiseAnd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 137 | BitwiseLogicalOp(ctx, inst, a, b, "AND"); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitBitwiseOr32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 141 | BitwiseLogicalOp(ctx, inst, a, b, "OR"); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitBitwiseXor32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 145 | BitwiseLogicalOp(ctx, inst, a, b, "XOR"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitBitFieldInsert(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 insert, | ||
| 149 | ScalarS32 offset, ScalarS32 count) { | ||
| 150 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 151 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 152 | ctx.Add("BFI.S {},{{{},{},0,0}},{},{};", ret, count, offset, insert, base); | ||
| 153 | } else { | ||
| 154 | ctx.Add("MOV.S RC.x,{};" | ||
| 155 | "MOV.S RC.y,{};" | ||
| 156 | "BFI.S {},RC,{},{};", | ||
| 157 | count, offset, ret, insert, base); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, ScalarS32 offset, | ||
| 162 | ScalarS32 count) { | ||
| 163 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 164 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 165 | ctx.Add("BFE.S {},{{{},{},0,0}},{};", ret, count, offset, base); | ||
| 166 | } else { | ||
| 167 | ctx.Add("MOV.S RC.x,{};" | ||
| 168 | "MOV.S RC.y,{};" | ||
| 169 | "BFE.S {},RC,{};", | ||
| 170 | count, offset, ret, base); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, | ||
| 175 | ScalarU32 count) { | ||
| 176 | const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); | ||
| 177 | const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); | ||
| 178 | if (zero) { | ||
| 179 | zero->Invalidate(); | ||
| 180 | } | ||
| 181 | if (sign) { | ||
| 182 | sign->Invalidate(); | ||
| 183 | } | ||
| 184 | if (zero || sign) { | ||
| 185 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 186 | } | ||
| 187 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 188 | if (count.type != Type::Register && offset.type != Type::Register) { | ||
| 189 | ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); | ||
| 190 | } else { | ||
| 191 | ctx.Add("MOV.U RC.x,{};" | ||
| 192 | "MOV.U RC.y,{};" | ||
| 193 | "BFE.U {},RC,{};", | ||
| 194 | count, offset, ret, base); | ||
| 195 | } | ||
| 196 | if (zero) { | ||
| 197 | ctx.Add("SEQ.S {},{},0;", *zero, ret); | ||
| 198 | } | ||
| 199 | if (sign) { | ||
| 200 | ctx.Add("SLT.S {},{},0;", *sign, ret); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitBitReverse32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 205 | ctx.Add("BFR {},{};", inst, value); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitBitCount32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 209 | ctx.Add("BTC {},{};", inst, value); | ||
| 210 | } | ||
| 211 | |||
| 212 | void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 213 | ctx.Add("NOT.S {},{};", inst, value); | ||
| 214 | } | ||
| 215 | |||
| 216 | void EmitFindSMsb32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 217 | ctx.Add("BTFM.S {},{};", inst, value); | ||
| 218 | } | ||
| 219 | |||
| 220 | void EmitFindUMsb32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value) { | ||
| 221 | ctx.Add("BTFM.U {},{};", inst, value); | ||
| 222 | } | ||
| 223 | |||
| 224 | void EmitSMin32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 225 | ctx.Add("MIN.S {},{},{};", inst, a, b); | ||
| 226 | } | ||
| 227 | |||
| 228 | void EmitUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 229 | ctx.Add("MIN.U {},{},{};", inst, a, b); | ||
| 230 | } | ||
| 231 | |||
| 232 | void EmitSMax32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 233 | ctx.Add("MAX.S {},{},{};", inst, a, b); | ||
| 234 | } | ||
| 235 | |||
| 236 | void EmitUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 a, ScalarU32 b) { | ||
| 237 | ctx.Add("MAX.U {},{},{};", inst, a, b); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitSClamp32(EmitContext& ctx, IR::Inst& inst, ScalarS32 value, ScalarS32 min, ScalarS32 max) { | ||
| 241 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 242 | ctx.Add("MIN.S RC.x,{},{};" | ||
| 243 | "MAX.S {}.x,RC.x,{};", | ||
| 244 | max, value, ret, min); | ||
| 245 | } | ||
| 246 | |||
| 247 | void EmitUClamp32(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 min, ScalarU32 max) { | ||
| 248 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 249 | ctx.Add("MIN.U RC.x,{},{};" | ||
| 250 | "MAX.U {}.x,RC.x,{};", | ||
| 251 | max, value, ret, min); | ||
| 252 | } | ||
| 253 | |||
| 254 | void EmitSLessThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 255 | ctx.Add("SLT.S {}.x,{},{};", inst, lhs, rhs); | ||
| 256 | } | ||
| 257 | |||
| 258 | void EmitULessThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 259 | ctx.Add("SLT.U {}.x,{},{};", inst, lhs, rhs); | ||
| 260 | } | ||
| 261 | |||
| 262 | void EmitIEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 263 | ctx.Add("SEQ.S {}.x,{},{};", inst, lhs, rhs); | ||
| 264 | } | ||
| 265 | |||
| 266 | void EmitSLessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 267 | ctx.Add("SLE.S {}.x,{},{};", inst, lhs, rhs); | ||
| 268 | } | ||
| 269 | |||
| 270 | void EmitULessThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 271 | ctx.Add("SLE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 272 | } | ||
| 273 | |||
| 274 | void EmitSGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 275 | ctx.Add("SGT.S {}.x,{},{};", inst, lhs, rhs); | ||
| 276 | } | ||
| 277 | |||
| 278 | void EmitUGreaterThan(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 279 | ctx.Add("SGT.U {}.x,{},{};", inst, lhs, rhs); | ||
| 280 | } | ||
| 281 | |||
| 282 | void EmitINotEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 283 | ctx.Add("SNE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 284 | } | ||
| 285 | |||
| 286 | void EmitSGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 lhs, ScalarS32 rhs) { | ||
| 287 | ctx.Add("SGE.S {}.x,{},{};", inst, lhs, rhs); | ||
| 288 | } | ||
| 289 | |||
| 290 | void EmitUGreaterThanEqual(EmitContext& ctx, IR::Inst& inst, ScalarU32 lhs, ScalarU32 rhs) { | ||
| 291 | ctx.Add("SGE.U {}.x,{},{};", inst, lhs, rhs); | ||
| 292 | } | ||
| 293 | |||
| 294 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_logical.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp new file mode 100644 index 000000000..af9fac7c1 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -0,0 +1,568 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/runtime_info.h" | ||
| 12 | |||
| 13 | namespace Shader::Backend::GLASM { | ||
| 14 | namespace { | ||
| 15 | void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 16 | std::string_view then_expr, std::string_view else_expr = {}) { | ||
| 17 | // Operate on bindless SSBO, call the expression with bounds checking | ||
| 18 | // address = c[binding].xy | ||
| 19 | // length = c[binding].z | ||
| 20 | const u32 sb_binding{binding.U32()}; | ||
| 21 | ctx.Add("PK64.U DC,c[{}];" // pointer = address | ||
| 22 | "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) | ||
| 23 | "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset | ||
| 24 | "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length | ||
| 25 | sb_binding, offset, offset, sb_binding); | ||
| 26 | if (else_expr.empty()) { | ||
| 27 | ctx.Add("IF NE.x;{}ENDIF;", then_expr); | ||
| 28 | } else { | ||
| 29 | ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, | ||
| 34 | std::string_view else_expr = {}) { | ||
| 35 | const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; | ||
| 36 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 37 | if (!ctx.info.nvn_buffer_used[index]) { | ||
| 38 | continue; | ||
| 39 | } | ||
| 40 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | ||
| 41 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr | ||
| 42 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 | ||
| 43 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 | ||
| 44 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size | ||
| 45 | "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 0 | ||
| 46 | "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 0 | ||
| 47 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | ||
| 48 | "IF NE.x;" // if cond | ||
| 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | ||
| 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | ||
| 51 | address, address); | ||
| 52 | if (pointer_based) { | ||
| 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | ||
| 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | ||
| 55 | "{}" | ||
| 56 | "ELSE;", | ||
| 57 | index, expr); | ||
| 58 | } else { | ||
| 59 | ctx.Add("CVT.U32.U64 RC.x,DC.x;" | ||
| 60 | "{},ssbo{}[RC.x];" | ||
| 61 | "ELSE;", | ||
| 62 | expr, index); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | if (!else_expr.empty()) { | ||
| 66 | ctx.Add("{}", else_expr); | ||
| 67 | } | ||
| 68 | const size_t num_used_buffers{ctx.info.nvn_buffer_used.count()}; | ||
| 69 | for (size_t index = 0; index < num_used_buffers; ++index) { | ||
| 70 | ctx.Add("ENDIF;"); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | template <typename ValueType> | ||
| 75 | void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, | ||
| 76 | std::string_view size) { | ||
| 77 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 78 | ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); | ||
| 79 | } else { | ||
| 80 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 85 | std::string_view size) { | ||
| 86 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 87 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 88 | ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); | ||
| 89 | } else { | ||
| 90 | StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 91 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | template <typename ValueType> | ||
| 96 | void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { | ||
| 97 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 98 | GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); | ||
| 99 | } else { | ||
| 100 | GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { | ||
| 105 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 106 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 107 | GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); | ||
| 108 | } else { | ||
| 109 | GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 110 | fmt::format("MOV.S {},0;", ret)); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 114 | template <typename ValueType> | ||
| 115 | void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 116 | ValueType value, std::string_view operation, std::string_view size) { | ||
| 117 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 118 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 119 | ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), | ||
| 120 | offset); | ||
| 121 | } else { | ||
| 122 | StorageOp(ctx, binding, offset, | ||
| 123 | fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | } // Anonymous namespace | ||
| 127 | |||
| 128 | void EmitLoadGlobalU8(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 129 | GlobalLoad(ctx, inst, address, "U8"); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitLoadGlobalS8(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 133 | GlobalLoad(ctx, inst, address, "S8"); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitLoadGlobalU16(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 137 | GlobalLoad(ctx, inst, address, "U16"); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitLoadGlobalS16(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 141 | GlobalLoad(ctx, inst, address, "S16"); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 145 | GlobalLoad(ctx, inst, address, "U32"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 149 | GlobalLoad(ctx, inst, address, "U32X2"); | ||
| 150 | } | ||
| 151 | |||
| 152 | void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, Register address) { | ||
| 153 | GlobalLoad(ctx, inst, address, "U32X4"); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitWriteGlobalU8(EmitContext& ctx, Register address, Register value) { | ||
| 157 | GlobalWrite(ctx, address, value, "U8"); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitWriteGlobalS8(EmitContext& ctx, Register address, Register value) { | ||
| 161 | GlobalWrite(ctx, address, value, "S8"); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitWriteGlobalU16(EmitContext& ctx, Register address, Register value) { | ||
| 165 | GlobalWrite(ctx, address, value, "U16"); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitWriteGlobalS16(EmitContext& ctx, Register address, Register value) { | ||
| 169 | GlobalWrite(ctx, address, value, "S16"); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitWriteGlobal32(EmitContext& ctx, Register address, ScalarU32 value) { | ||
| 173 | GlobalWrite(ctx, address, value, "U32"); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitWriteGlobal64(EmitContext& ctx, Register address, Register value) { | ||
| 177 | GlobalWrite(ctx, address, value, "U32X2"); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitWriteGlobal128(EmitContext& ctx, Register address, Register value) { | ||
| 181 | GlobalWrite(ctx, address, value, "U32X4"); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 185 | ScalarU32 offset) { | ||
| 186 | Load(ctx, inst, binding, offset, "U8"); | ||
| 187 | } | ||
| 188 | |||
| 189 | void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 190 | ScalarU32 offset) { | ||
| 191 | Load(ctx, inst, binding, offset, "S8"); | ||
| 192 | } | ||
| 193 | |||
| 194 | void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 195 | ScalarU32 offset) { | ||
| 196 | Load(ctx, inst, binding, offset, "U16"); | ||
| 197 | } | ||
| 198 | |||
| 199 | void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 200 | ScalarU32 offset) { | ||
| 201 | Load(ctx, inst, binding, offset, "S16"); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 205 | ScalarU32 offset) { | ||
| 206 | Load(ctx, inst, binding, offset, "U32"); | ||
| 207 | } | ||
| 208 | |||
| 209 | void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 210 | ScalarU32 offset) { | ||
| 211 | Load(ctx, inst, binding, offset, "U32X2"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 215 | ScalarU32 offset) { | ||
| 216 | Load(ctx, inst, binding, offset, "U32X4"); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 220 | ScalarU32 value) { | ||
| 221 | Write(ctx, binding, offset, value, "U8"); | ||
| 222 | } | ||
| 223 | |||
| 224 | void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 225 | ScalarS32 value) { | ||
| 226 | Write(ctx, binding, offset, value, "S8"); | ||
| 227 | } | ||
| 228 | |||
| 229 | void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 230 | ScalarU32 value) { | ||
| 231 | Write(ctx, binding, offset, value, "U16"); | ||
| 232 | } | ||
| 233 | |||
| 234 | void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 235 | ScalarS32 value) { | ||
| 236 | Write(ctx, binding, offset, value, "S16"); | ||
| 237 | } | ||
| 238 | |||
| 239 | void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 240 | ScalarU32 value) { | ||
| 241 | Write(ctx, binding, offset, value, "U32"); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 245 | Register value) { | ||
| 246 | Write(ctx, binding, offset, value, "U32X2"); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 250 | Register value) { | ||
| 251 | Write(ctx, binding, offset, value, "U32X4"); | ||
| 252 | } | ||
| 253 | |||
| 254 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 255 | ScalarU32 value) { | ||
| 256 | ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 257 | } | ||
| 258 | |||
| 259 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 260 | ScalarS32 value) { | ||
| 261 | ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 265 | ScalarU32 value) { | ||
| 266 | ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 267 | } | ||
| 268 | |||
| 269 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 270 | ScalarS32 value) { | ||
| 271 | ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 272 | } | ||
| 273 | |||
| 274 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 275 | ScalarU32 value) { | ||
| 276 | ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 277 | } | ||
| 278 | |||
| 279 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 280 | ScalarU32 value) { | ||
| 281 | ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 282 | } | ||
| 283 | |||
| 284 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 285 | ScalarU32 value) { | ||
| 286 | ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 287 | } | ||
| 288 | |||
| 289 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 290 | ScalarU32 value) { | ||
| 291 | ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 292 | } | ||
| 293 | |||
| 294 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 295 | ScalarU32 value) { | ||
| 296 | ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 297 | } | ||
| 298 | |||
| 299 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 300 | ScalarU32 value) { | ||
| 301 | ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 302 | } | ||
| 303 | |||
| 304 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 305 | ScalarU32 value) { | ||
| 306 | ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 307 | } | ||
| 308 | |||
| 309 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 310 | Register value) { | ||
| 311 | ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 312 | } | ||
| 313 | |||
| 314 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 315 | ScalarU32 offset, ScalarU32 value) { | ||
| 316 | Atom(ctx, inst, binding, offset, value, "ADD", "U32"); | ||
| 317 | } | ||
| 318 | |||
| 319 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 320 | ScalarU32 offset, ScalarS32 value) { | ||
| 321 | Atom(ctx, inst, binding, offset, value, "MIN", "S32"); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 325 | ScalarU32 offset, ScalarU32 value) { | ||
| 326 | Atom(ctx, inst, binding, offset, value, "MIN", "U32"); | ||
| 327 | } | ||
| 328 | |||
| 329 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 330 | ScalarU32 offset, ScalarS32 value) { | ||
| 331 | Atom(ctx, inst, binding, offset, value, "MAX", "S32"); | ||
| 332 | } | ||
| 333 | |||
| 334 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 335 | ScalarU32 offset, ScalarU32 value) { | ||
| 336 | Atom(ctx, inst, binding, offset, value, "MAX", "U32"); | ||
| 337 | } | ||
| 338 | |||
| 339 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 340 | ScalarU32 offset, ScalarU32 value) { | ||
| 341 | Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); | ||
| 342 | } | ||
| 343 | |||
| 344 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 345 | ScalarU32 offset, ScalarU32 value) { | ||
| 346 | Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); | ||
| 347 | } | ||
| 348 | |||
| 349 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 350 | ScalarU32 offset, ScalarU32 value) { | ||
| 351 | Atom(ctx, inst, binding, offset, value, "AND", "U32"); | ||
| 352 | } | ||
| 353 | |||
| 354 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 355 | ScalarU32 offset, ScalarU32 value) { | ||
| 356 | Atom(ctx, inst, binding, offset, value, "OR", "U32"); | ||
| 357 | } | ||
| 358 | |||
| 359 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 360 | ScalarU32 offset, ScalarU32 value) { | ||
| 361 | Atom(ctx, inst, binding, offset, value, "XOR", "U32"); | ||
| 362 | } | ||
| 363 | |||
| 364 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 365 | ScalarU32 offset, ScalarU32 value) { | ||
| 366 | Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); | ||
| 367 | } | ||
| 368 | |||
| 369 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 370 | ScalarU32 offset, Register value) { | ||
| 371 | Atom(ctx, inst, binding, offset, value, "ADD", "U64"); | ||
| 372 | } | ||
| 373 | |||
| 374 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 375 | ScalarU32 offset, Register value) { | ||
| 376 | Atom(ctx, inst, binding, offset, value, "MIN", "S64"); | ||
| 377 | } | ||
| 378 | |||
| 379 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 380 | ScalarU32 offset, Register value) { | ||
| 381 | Atom(ctx, inst, binding, offset, value, "MIN", "U64"); | ||
| 382 | } | ||
| 383 | |||
| 384 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 385 | ScalarU32 offset, Register value) { | ||
| 386 | Atom(ctx, inst, binding, offset, value, "MAX", "S64"); | ||
| 387 | } | ||
| 388 | |||
| 389 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 390 | ScalarU32 offset, Register value) { | ||
| 391 | Atom(ctx, inst, binding, offset, value, "MAX", "U64"); | ||
| 392 | } | ||
| 393 | |||
| 394 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 395 | ScalarU32 offset, Register value) { | ||
| 396 | Atom(ctx, inst, binding, offset, value, "AND", "U64"); | ||
| 397 | } | ||
| 398 | |||
| 399 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 400 | ScalarU32 offset, Register value) { | ||
| 401 | Atom(ctx, inst, binding, offset, value, "OR", "U64"); | ||
| 402 | } | ||
| 403 | |||
| 404 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 405 | ScalarU32 offset, Register value) { | ||
| 406 | Atom(ctx, inst, binding, offset, value, "XOR", "U64"); | ||
| 407 | } | ||
| 408 | |||
| 409 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 410 | ScalarU32 offset, Register value) { | ||
| 411 | Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); | ||
| 412 | } | ||
| 413 | |||
| 414 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 415 | ScalarU32 offset, ScalarF32 value) { | ||
| 416 | Atom(ctx, inst, binding, offset, value, "ADD", "F32"); | ||
| 417 | } | ||
| 418 | |||
| 419 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 420 | ScalarU32 offset, Register value) { | ||
| 421 | Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); | ||
| 422 | } | ||
| 423 | |||
| 424 | void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 425 | [[maybe_unused]] const IR::Value& binding, | ||
| 426 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 427 | throw NotImplementedException("GLASM instruction"); | ||
| 428 | } | ||
| 429 | |||
| 430 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 431 | ScalarU32 offset, Register value) { | ||
| 432 | Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); | ||
| 433 | } | ||
| 434 | |||
| 435 | void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 436 | [[maybe_unused]] const IR::Value& binding, | ||
| 437 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 438 | throw NotImplementedException("GLASM instruction"); | ||
| 439 | } | ||
| 440 | |||
| 441 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 442 | ScalarU32 offset, Register value) { | ||
| 443 | Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); | ||
| 444 | } | ||
| 445 | |||
| 446 | void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 447 | [[maybe_unused]] const IR::Value& binding, | ||
| 448 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 449 | throw NotImplementedException("GLASM instruction"); | ||
| 450 | } | ||
| 451 | |||
| 452 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 453 | throw NotImplementedException("GLASM instruction"); | ||
| 454 | } | ||
| 455 | |||
| 456 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 457 | throw NotImplementedException("GLASM instruction"); | ||
| 458 | } | ||
| 459 | |||
| 460 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 461 | throw NotImplementedException("GLASM instruction"); | ||
| 462 | } | ||
| 463 | |||
| 464 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 465 | throw NotImplementedException("GLASM instruction"); | ||
| 466 | } | ||
| 467 | |||
| 468 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 469 | throw NotImplementedException("GLASM instruction"); | ||
| 470 | } | ||
| 471 | |||
| 472 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 473 | throw NotImplementedException("GLASM instruction"); | ||
| 474 | } | ||
| 475 | |||
| 476 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 477 | throw NotImplementedException("GLASM instruction"); | ||
| 478 | } | ||
| 479 | |||
| 480 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 481 | throw NotImplementedException("GLASM instruction"); | ||
| 482 | } | ||
| 483 | |||
| 484 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 485 | throw NotImplementedException("GLASM instruction"); | ||
| 486 | } | ||
| 487 | |||
| 488 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 489 | throw NotImplementedException("GLASM instruction"); | ||
| 490 | } | ||
| 491 | |||
| 492 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 493 | throw NotImplementedException("GLASM instruction"); | ||
| 494 | } | ||
| 495 | |||
| 496 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 497 | throw NotImplementedException("GLASM instruction"); | ||
| 498 | } | ||
| 499 | |||
| 500 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 501 | throw NotImplementedException("GLASM instruction"); | ||
| 502 | } | ||
| 503 | |||
| 504 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 505 | throw NotImplementedException("GLASM instruction"); | ||
| 506 | } | ||
| 507 | |||
| 508 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 509 | throw NotImplementedException("GLASM instruction"); | ||
| 510 | } | ||
| 511 | |||
| 512 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 513 | throw NotImplementedException("GLASM instruction"); | ||
| 514 | } | ||
| 515 | |||
| 516 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 517 | throw NotImplementedException("GLASM instruction"); | ||
| 518 | } | ||
| 519 | |||
| 520 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 521 | throw NotImplementedException("GLASM instruction"); | ||
| 522 | } | ||
| 523 | |||
| 524 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 525 | throw NotImplementedException("GLASM instruction"); | ||
| 526 | } | ||
| 527 | |||
| 528 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 529 | throw NotImplementedException("GLASM instruction"); | ||
| 530 | } | ||
| 531 | |||
| 532 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 533 | throw NotImplementedException("GLASM instruction"); | ||
| 534 | } | ||
| 535 | |||
| 536 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 537 | throw NotImplementedException("GLASM instruction"); | ||
| 538 | } | ||
| 539 | |||
| 540 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 541 | throw NotImplementedException("GLASM instruction"); | ||
| 542 | } | ||
| 543 | |||
| 544 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 545 | throw NotImplementedException("GLASM instruction"); | ||
| 546 | } | ||
| 547 | |||
| 548 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 549 | throw NotImplementedException("GLASM instruction"); | ||
| 550 | } | ||
| 551 | |||
| 552 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 553 | throw NotImplementedException("GLASM instruction"); | ||
| 554 | } | ||
| 555 | |||
| 556 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 557 | throw NotImplementedException("GLASM instruction"); | ||
| 558 | } | ||
| 559 | |||
| 560 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 561 | throw NotImplementedException("GLASM instruction"); | ||
| 562 | } | ||
| 563 | |||
| 564 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 565 | throw NotImplementedException("GLASM instruction"); | ||
| 566 | } | ||
| 567 | |||
| 568 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp new file mode 100644 index 000000000..ff64c6924 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | |||
| 12 | #ifdef _MSC_VER | ||
| 13 | #pragma warning(disable : 4100) | ||
| 14 | #endif | ||
| 15 | |||
| 16 | namespace Shader::Backend::GLASM { | ||
| 17 | |||
| 18 | #define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__) | ||
| 19 | |||
| 20 | static void DefinePhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 21 | switch (phi.Arg(0).Type()) { | ||
| 22 | case IR::Type::U1: | ||
| 23 | case IR::Type::U32: | ||
| 24 | case IR::Type::F32: | ||
| 25 | ctx.reg_alloc.Define(phi); | ||
| 26 | break; | ||
| 27 | case IR::Type::U64: | ||
| 28 | case IR::Type::F64: | ||
| 29 | ctx.reg_alloc.LongDefine(phi); | ||
| 30 | break; | ||
| 31 | default: | ||
| 32 | throw NotImplementedException("Phi node type {}", phi.Type()); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | ||
| 37 | const size_t num_args{phi.NumArgs()}; | ||
| 38 | for (size_t i = 0; i < num_args; ++i) { | ||
| 39 | ctx.reg_alloc.Consume(phi.Arg(i)); | ||
| 40 | } | ||
| 41 | if (!phi.Definition<Id>().is_valid) { | ||
| 42 | // The phi node wasn't forward defined | ||
| 43 | DefinePhi(ctx, phi); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitVoid(EmitContext&) {} | ||
| 48 | |||
| 49 | void EmitReference(EmitContext& ctx, const IR::Value& value) { | ||
| 50 | ctx.reg_alloc.Consume(value); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& value) { | ||
| 54 | IR::Inst& phi{RegAlloc::AliasInst(*phi_value.Inst())}; | ||
| 55 | if (!phi.Definition<Id>().is_valid) { | ||
| 56 | // The phi node wasn't forward defined | ||
| 57 | DefinePhi(ctx, phi); | ||
| 58 | } | ||
| 59 | const Register phi_reg{ctx.reg_alloc.Consume(IR::Value{&phi})}; | ||
| 60 | const Value eval_value{ctx.reg_alloc.Consume(value)}; | ||
| 61 | |||
| 62 | if (phi_reg == eval_value) { | ||
| 63 | return; | ||
| 64 | } | ||
| 65 | switch (phi.Flags<IR::Type>()) { | ||
| 66 | case IR::Type::U1: | ||
| 67 | case IR::Type::U32: | ||
| 68 | case IR::Type::F32: | ||
| 69 | ctx.Add("MOV.S {}.x,{};", phi_reg, ScalarS32{eval_value}); | ||
| 70 | break; | ||
| 71 | case IR::Type::U64: | ||
| 72 | case IR::Type::F64: | ||
| 73 | ctx.Add("MOV.U64 {}.x,{};", phi_reg, ScalarRegister{eval_value}); | ||
| 74 | break; | ||
| 75 | default: | ||
| 76 | throw NotImplementedException("Phi node type {}", phi.Type()); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void EmitJoin(EmitContext& ctx) { | ||
| 81 | NotImplemented(); | ||
| 82 | } | ||
| 83 | |||
| 84 | void EmitDemoteToHelperInvocation(EmitContext& ctx) { | ||
| 85 | ctx.Add("KIL TR.x;"); | ||
| 86 | } | ||
| 87 | |||
| 88 | void EmitBarrier(EmitContext& ctx) { | ||
| 89 | ctx.Add("BAR;"); | ||
| 90 | } | ||
| 91 | |||
| 92 | void EmitWorkgroupMemoryBarrier(EmitContext& ctx) { | ||
| 93 | ctx.Add("MEMBAR.CTA;"); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitDeviceMemoryBarrier(EmitContext& ctx) { | ||
| 97 | ctx.Add("MEMBAR;"); | ||
| 98 | } | ||
| 99 | |||
| 100 | void EmitPrologue(EmitContext& ctx) { | ||
| 101 | // TODO | ||
| 102 | } | ||
| 103 | |||
| 104 | void EmitEpilogue(EmitContext& ctx) { | ||
| 105 | // TODO | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitEmitVertex(EmitContext& ctx, ScalarS32 stream) { | ||
| 109 | if (stream.type == Type::U32 && stream.imm_u32 == 0) { | ||
| 110 | ctx.Add("EMIT;"); | ||
| 111 | } else { | ||
| 112 | ctx.Add("EMITS {};", stream); | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||
| 117 | if (!stream.IsImmediate()) { | ||
| 118 | LOG_WARNING(Shader_GLASM, "Stream is not immediate"); | ||
| 119 | } | ||
| 120 | ctx.reg_alloc.Consume(stream); | ||
| 121 | ctx.Add("ENDPRIM;"); | ||
| 122 | } | ||
| 123 | |||
| 124 | void EmitGetRegister(EmitContext& ctx) { | ||
| 125 | NotImplemented(); | ||
| 126 | } | ||
| 127 | |||
| 128 | void EmitSetRegister(EmitContext& ctx) { | ||
| 129 | NotImplemented(); | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitGetPred(EmitContext& ctx) { | ||
| 133 | NotImplemented(); | ||
| 134 | } | ||
| 135 | |||
| 136 | void EmitSetPred(EmitContext& ctx) { | ||
| 137 | NotImplemented(); | ||
| 138 | } | ||
| 139 | |||
| 140 | void EmitSetGotoVariable(EmitContext& ctx) { | ||
| 141 | NotImplemented(); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitGetGotoVariable(EmitContext& ctx) { | ||
| 145 | NotImplemented(); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitSetIndirectBranchVariable(EmitContext& ctx) { | ||
| 149 | NotImplemented(); | ||
| 150 | } | ||
| 151 | |||
| 152 | void EmitGetIndirectBranchVariable(EmitContext& ctx) { | ||
| 153 | NotImplemented(); | ||
| 154 | } | ||
| 155 | |||
| 156 | void EmitGetZFlag(EmitContext& ctx) { | ||
| 157 | NotImplemented(); | ||
| 158 | } | ||
| 159 | |||
| 160 | void EmitGetSFlag(EmitContext& ctx) { | ||
| 161 | NotImplemented(); | ||
| 162 | } | ||
| 163 | |||
| 164 | void EmitGetCFlag(EmitContext& ctx) { | ||
| 165 | NotImplemented(); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitGetOFlag(EmitContext& ctx) { | ||
| 169 | NotImplemented(); | ||
| 170 | } | ||
| 171 | |||
| 172 | void EmitSetZFlag(EmitContext& ctx) { | ||
| 173 | NotImplemented(); | ||
| 174 | } | ||
| 175 | |||
| 176 | void EmitSetSFlag(EmitContext& ctx) { | ||
| 177 | NotImplemented(); | ||
| 178 | } | ||
| 179 | |||
| 180 | void EmitSetCFlag(EmitContext& ctx) { | ||
| 181 | NotImplemented(); | ||
| 182 | } | ||
| 183 | |||
| 184 | void EmitSetOFlag(EmitContext& ctx) { | ||
| 185 | NotImplemented(); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitWorkgroupId(EmitContext& ctx, IR::Inst& inst) { | ||
| 189 | ctx.Add("MOV.S {},invocation.groupid;", inst); | ||
| 190 | } | ||
| 191 | |||
| 192 | void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 193 | ctx.Add("MOV.S {},invocation.localid;", inst); | ||
| 194 | } | ||
| 195 | |||
| 196 | void EmitInvocationId(EmitContext& ctx, IR::Inst& inst) { | ||
| 197 | ctx.Add("MOV.S {}.x,primitive_invocation.x;", inst); | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitSampleId(EmitContext& ctx, IR::Inst& inst) { | ||
| 201 | ctx.Add("MOV.S {}.x,fragment.sampleid.x;", inst); | ||
| 202 | } | ||
| 203 | |||
| 204 | void EmitIsHelperInvocation(EmitContext& ctx, IR::Inst& inst) { | ||
| 205 | ctx.Add("MOV.S {}.x,fragment.helperthread.x;", inst); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitYDirection(EmitContext& ctx, IR::Inst& inst) { | ||
| 209 | ctx.uses_y_direction = true; | ||
| 210 | ctx.Add("MOV.F {}.x,y_direction[0].w;", inst); | ||
| 211 | } | ||
| 212 | |||
| 213 | void EmitUndefU1(EmitContext& ctx, IR::Inst& inst) { | ||
| 214 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 215 | } | ||
| 216 | |||
| 217 | void EmitUndefU8(EmitContext& ctx, IR::Inst& inst) { | ||
| 218 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 219 | } | ||
| 220 | |||
| 221 | void EmitUndefU16(EmitContext& ctx, IR::Inst& inst) { | ||
| 222 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 223 | } | ||
| 224 | |||
| 225 | void EmitUndefU32(EmitContext& ctx, IR::Inst& inst) { | ||
| 226 | ctx.Add("MOV.S {}.x,0;", inst); | ||
| 227 | } | ||
| 228 | |||
| 229 | void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) { | ||
| 230 | ctx.LongAdd("MOV.S64 {}.x,0;", inst); | ||
| 231 | } | ||
| 232 | |||
| 233 | void EmitGetZeroFromOp(EmitContext& ctx) { | ||
| 234 | NotImplemented(); | ||
| 235 | } | ||
| 236 | |||
| 237 | void EmitGetSignFromOp(EmitContext& ctx) { | ||
| 238 | NotImplemented(); | ||
| 239 | } | ||
| 240 | |||
| 241 | void EmitGetCarryFromOp(EmitContext& ctx) { | ||
| 242 | NotImplemented(); | ||
| 243 | } | ||
| 244 | |||
| 245 | void EmitGetOverflowFromOp(EmitContext& ctx) { | ||
| 246 | NotImplemented(); | ||
| 247 | } | ||
| 248 | |||
| 249 | void EmitGetSparseFromOp(EmitContext& ctx) { | ||
| 250 | NotImplemented(); | ||
| 251 | } | ||
| 252 | |||
| 253 | void EmitGetInBoundsFromOp(EmitContext& ctx) { | ||
| 254 | NotImplemented(); | ||
| 255 | } | ||
| 256 | |||
| 257 | void EmitLogicalOr(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 258 | ctx.Add("OR.S {},{},{};", inst, a, b); | ||
| 259 | } | ||
| 260 | |||
| 261 | void EmitLogicalAnd(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 262 | ctx.Add("AND.S {},{},{};", inst, a, b); | ||
| 263 | } | ||
| 264 | |||
| 265 | void EmitLogicalXor(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { | ||
| 266 | ctx.Add("XOR.S {},{},{};", inst, a, b); | ||
| 267 | } | ||
| 268 | |||
| 269 | void EmitLogicalNot(EmitContext& ctx, IR::Inst& inst, ScalarS32 value) { | ||
| 270 | ctx.Add("SEQ.S {},{},0;", inst, value); | ||
| 271 | } | ||
| 272 | |||
| 273 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp new file mode 100644 index 000000000..68fff613c --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_select.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | |||
| 2 | // Copyright 2021 yuzu Emulator Project | ||
| 3 | // Licensed under GPLv2 or any later version | ||
| 4 | // Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 7 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | |||
| 12 | void EmitSelectU1(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 13 | ScalarS32 false_value) { | ||
| 14 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 15 | } | ||
| 16 | |||
| 17 | void EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 18 | [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { | ||
| 19 | throw NotImplementedException("GLASM instruction"); | ||
| 20 | } | ||
| 21 | |||
| 22 | void EmitSelectU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 23 | [[maybe_unused]] ScalarS32 true_value, [[maybe_unused]] ScalarS32 false_value) { | ||
| 24 | throw NotImplementedException("GLASM instruction"); | ||
| 25 | } | ||
| 26 | |||
| 27 | void EmitSelectU32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 28 | ScalarS32 false_value) { | ||
| 29 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitSelectU64(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, Register true_value, | ||
| 33 | Register false_value) { | ||
| 34 | ctx.reg_alloc.InvalidateConditionCodes(); | ||
| 35 | const Register ret{ctx.reg_alloc.LongDefine(inst)}; | ||
| 36 | if (ret == true_value) { | ||
| 37 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 38 | "MOV.U64 {}.x(EQ.x),{};", | ||
| 39 | cond, ret, false_value); | ||
| 40 | } else if (ret == false_value) { | ||
| 41 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 42 | "MOV.U64 {}.x(NE.x),{};", | ||
| 43 | cond, ret, true_value); | ||
| 44 | } else { | ||
| 45 | ctx.Add("MOV.S.CC RC.x,{};" | ||
| 46 | "MOV.U64 {}.x,{};" | ||
| 47 | "MOV.U64 {}.x(NE.x),{};", | ||
| 48 | cond, ret, false_value, ret, true_value); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitSelectF16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 53 | [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { | ||
| 54 | throw NotImplementedException("GLASM instruction"); | ||
| 55 | } | ||
| 56 | |||
| 57 | void EmitSelectF32(EmitContext& ctx, IR::Inst& inst, ScalarS32 cond, ScalarS32 true_value, | ||
| 58 | ScalarS32 false_value) { | ||
| 59 | ctx.Add("CMP.S {},{},{},{};", inst, cond, true_value, false_value); | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitSelectF64([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] ScalarS32 cond, | ||
| 63 | [[maybe_unused]] Register true_value, [[maybe_unused]] Register false_value) { | ||
| 64 | throw NotImplementedException("GLASM instruction"); | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp new file mode 100644 index 000000000..c1498f449 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_shared_memory.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | |||
| 2 | // Copyright 2021 yuzu Emulator Project | ||
| 3 | // Licensed under GPLv2 or any later version | ||
| 4 | // Refer to the license.txt file included. | ||
| 5 | |||
| 6 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 7 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | void EmitLoadSharedU8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 12 | ctx.Add("LDS.U8 {},shared_mem[{}];", inst, offset); | ||
| 13 | } | ||
| 14 | |||
| 15 | void EmitLoadSharedS8(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 16 | ctx.Add("LDS.S8 {},shared_mem[{}];", inst, offset); | ||
| 17 | } | ||
| 18 | |||
| 19 | void EmitLoadSharedU16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 20 | ctx.Add("LDS.U16 {},shared_mem[{}];", inst, offset); | ||
| 21 | } | ||
| 22 | |||
| 23 | void EmitLoadSharedS16(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 24 | ctx.Add("LDS.S16 {},shared_mem[{}];", inst, offset); | ||
| 25 | } | ||
| 26 | |||
| 27 | void EmitLoadSharedU32(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 28 | ctx.Add("LDS.U32 {},shared_mem[{}];", inst, offset); | ||
| 29 | } | ||
| 30 | |||
| 31 | void EmitLoadSharedU64(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 32 | ctx.Add("LDS.U32X2 {},shared_mem[{}];", inst, offset); | ||
| 33 | } | ||
| 34 | |||
| 35 | void EmitLoadSharedU128(EmitContext& ctx, IR::Inst& inst, ScalarU32 offset) { | ||
| 36 | ctx.Add("LDS.U32X4 {},shared_mem[{}];", inst, offset); | ||
| 37 | } | ||
| 38 | |||
| 39 | void EmitWriteSharedU8(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 40 | ctx.Add("STS.U8 {},shared_mem[{}];", value, offset); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitWriteSharedU16(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 44 | ctx.Add("STS.U16 {},shared_mem[{}];", value, offset); | ||
| 45 | } | ||
| 46 | |||
| 47 | void EmitWriteSharedU32(EmitContext& ctx, ScalarU32 offset, ScalarU32 value) { | ||
| 48 | ctx.Add("STS.U32 {},shared_mem[{}];", value, offset); | ||
| 49 | } | ||
| 50 | |||
| 51 | void EmitWriteSharedU64(EmitContext& ctx, ScalarU32 offset, Register value) { | ||
| 52 | ctx.Add("STS.U32X2 {},shared_mem[{}];", value, offset); | ||
| 53 | } | ||
| 54 | |||
| 55 | void EmitWriteSharedU128(EmitContext& ctx, ScalarU32 offset, Register value) { | ||
| 56 | ctx.Add("STS.U32X4 {},shared_mem[{}];", value, offset); | ||
| 57 | } | ||
| 58 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_special.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_undefined.cpp | |||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp new file mode 100644 index 000000000..544d475b4 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp | |||
| @@ -0,0 +1,150 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | #include "shader_recompiler/profile.h" | ||
| 9 | |||
| 10 | namespace Shader::Backend::GLASM { | ||
| 11 | |||
| 12 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | ||
| 13 | ctx.Add("MOV.S {}.x,{}.threadid;", inst, ctx.stage_name); | ||
| 14 | } | ||
| 15 | |||
| 16 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 17 | ctx.Add("TGALL.S {}.x,{};", inst, pred); | ||
| 18 | } | ||
| 19 | |||
| 20 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 21 | ctx.Add("TGANY.S {}.x,{};", inst, pred); | ||
| 22 | } | ||
| 23 | |||
| 24 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 25 | ctx.Add("TGEQ.S {}.x,{};", inst, pred); | ||
| 26 | } | ||
| 27 | |||
| 28 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, ScalarS32 pred) { | ||
| 29 | ctx.Add("TGBALLOT {}.x,{};", inst, pred); | ||
| 30 | } | ||
| 31 | |||
| 32 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 33 | ctx.Add("MOV.U {},{}.threadeqmask;", inst, ctx.stage_name); | ||
| 34 | } | ||
| 35 | |||
| 36 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 37 | ctx.Add("MOV.U {},{}.threadltmask;", inst, ctx.stage_name); | ||
| 38 | } | ||
| 39 | |||
| 40 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 41 | ctx.Add("MOV.U {},{}.threadlemask;", inst, ctx.stage_name); | ||
| 42 | } | ||
| 43 | |||
| 44 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 45 | ctx.Add("MOV.U {},{}.threadgtmask;", inst, ctx.stage_name); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 49 | ctx.Add("MOV.U {},{}.threadgemask;", inst, ctx.stage_name); | ||
| 50 | } | ||
| 51 | |||
| 52 | static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 53 | const IR::Value& clamp, const IR::Value& segmentation_mask, | ||
| 54 | std::string_view op) { | ||
| 55 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 56 | if (in_bounds) { | ||
| 57 | in_bounds->Invalidate(); | ||
| 58 | } | ||
| 59 | std::string mask; | ||
| 60 | if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { | ||
| 61 | mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); | ||
| 62 | } else { | ||
| 63 | mask = "RC"; | ||
| 64 | ctx.Add("BFI.U RC.x,{{5,8,0,0}},{},{};", | ||
| 65 | ScalarU32{ctx.reg_alloc.Consume(segmentation_mask)}, | ||
| 66 | ScalarU32{ctx.reg_alloc.Consume(clamp)}); | ||
| 67 | } | ||
| 68 | const Register value_ret{ctx.reg_alloc.Define(inst)}; | ||
| 69 | if (in_bounds) { | ||
| 70 | const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; | ||
| 71 | ctx.Add("SHF{}.U {},{},{},{};" | ||
| 72 | "MOV.U {}.x,{}.y;", | ||
| 73 | op, bounds_ret, value, index, mask, value_ret, bounds_ret); | ||
| 74 | } else { | ||
| 75 | ctx.Add("SHF{}.U {},{},{},{};" | ||
| 76 | "MOV.U {}.x,{}.y;", | ||
| 77 | op, value_ret, value, index, mask, value_ret, value_ret); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | |||
| 81 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 82 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 83 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "IDX"); | ||
| 84 | } | ||
| 85 | |||
| 86 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 87 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 88 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "UP"); | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 92 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 93 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "DOWN"); | ||
| 94 | } | ||
| 95 | |||
| 96 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||
| 97 | const IR::Value& clamp, const IR::Value& segmentation_mask) { | ||
| 98 | Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); | ||
| 99 | } | ||
| 100 | |||
| 101 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||
| 102 | ScalarU32 swizzle) { | ||
| 103 | const auto ret{ctx.reg_alloc.Define(inst)}; | ||
| 104 | ctx.Add("AND.U RC.z,{}.threadid,3;" | ||
| 105 | "SHL.U RC.z,RC.z,1;" | ||
| 106 | "SHR.U RC.z,{},RC.z;" | ||
| 107 | "AND.U RC.z,RC.z,3;" | ||
| 108 | "MUL.F RC.x,{},FSWZA[RC.z];" | ||
| 109 | "MUL.F RC.y,{},FSWZB[RC.z];" | ||
| 110 | "ADD.F {}.x,RC.x,RC.y;", | ||
| 111 | ctx.stage_name, swizzle, op_a, op_b, ret); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 115 | if (ctx.profile.support_derivative_control) { | ||
| 116 | ctx.Add("DDX.FINE {}.x,{};", inst, p); | ||
| 117 | } else { | ||
| 118 | LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); | ||
| 119 | ctx.Add("DDX {}.x,{};", inst, p); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 124 | if (ctx.profile.support_derivative_control) { | ||
| 125 | ctx.Add("DDY.FINE {}.x,{};", inst, p); | ||
| 126 | } else { | ||
| 127 | LOG_WARNING(Shader_GLASM, "Fine derivatives not supported by device"); | ||
| 128 | ctx.Add("DDY {}.x,{};", inst, p); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 133 | if (ctx.profile.support_derivative_control) { | ||
| 134 | ctx.Add("DDX.COARSE {}.x,{};", inst, p); | ||
| 135 | } else { | ||
| 136 | LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); | ||
| 137 | ctx.Add("DDX {}.x,{};", inst, p); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||
| 142 | if (ctx.profile.support_derivative_control) { | ||
| 143 | ctx.Add("DDY.COARSE {}.x,{};", inst, p); | ||
| 144 | } else { | ||
| 145 | LOG_WARNING(Shader_GLASM, "Coarse derivatives not supported by device"); | ||
| 146 | ctx.Add("DDY {}.x,{};", inst, p); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 150 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp new file mode 100644 index 000000000..4c046db6e --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp | |||
| @@ -0,0 +1,186 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 10 | #include "shader_recompiler/backend/glasm/reg_alloc.h" | ||
| 11 | #include "shader_recompiler/exception.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 13 | |||
| 14 | namespace Shader::Backend::GLASM { | ||
| 15 | |||
| 16 | Register RegAlloc::Define(IR::Inst& inst) { | ||
| 17 | return Define(inst, false); | ||
| 18 | } | ||
| 19 | |||
| 20 | Register RegAlloc::LongDefine(IR::Inst& inst) { | ||
| 21 | return Define(inst, true); | ||
| 22 | } | ||
| 23 | |||
| 24 | Value RegAlloc::Peek(const IR::Value& value) { | ||
| 25 | if (value.IsImmediate()) { | ||
| 26 | return MakeImm(value); | ||
| 27 | } else { | ||
| 28 | return PeekInst(*value.Inst()); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | Value RegAlloc::Consume(const IR::Value& value) { | ||
| 33 | if (value.IsImmediate()) { | ||
| 34 | return MakeImm(value); | ||
| 35 | } else { | ||
| 36 | return ConsumeInst(*value.Inst()); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | void RegAlloc::Unref(IR::Inst& inst) { | ||
| 41 | IR::Inst& value_inst{AliasInst(inst)}; | ||
| 42 | value_inst.DestructiveRemoveUsage(); | ||
| 43 | if (!value_inst.HasUses()) { | ||
| 44 | Free(value_inst.Definition<Id>()); | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | Register RegAlloc::AllocReg() { | ||
| 49 | Register ret; | ||
| 50 | ret.type = Type::Register; | ||
| 51 | ret.id = Alloc(false); | ||
| 52 | return ret; | ||
| 53 | } | ||
| 54 | |||
| 55 | Register RegAlloc::AllocLongReg() { | ||
| 56 | Register ret; | ||
| 57 | ret.type = Type::Register; | ||
| 58 | ret.id = Alloc(true); | ||
| 59 | return ret; | ||
| 60 | } | ||
| 61 | |||
| 62 | void RegAlloc::FreeReg(Register reg) { | ||
| 63 | Free(reg.id); | ||
| 64 | } | ||
| 65 | |||
| 66 | Value RegAlloc::MakeImm(const IR::Value& value) { | ||
| 67 | Value ret; | ||
| 68 | switch (value.Type()) { | ||
| 69 | case IR::Type::Void: | ||
| 70 | ret.type = Type::Void; | ||
| 71 | break; | ||
| 72 | case IR::Type::U1: | ||
| 73 | ret.type = Type::U32; | ||
| 74 | ret.imm_u32 = value.U1() ? 0xffffffff : 0; | ||
| 75 | break; | ||
| 76 | case IR::Type::U32: | ||
| 77 | ret.type = Type::U32; | ||
| 78 | ret.imm_u32 = value.U32(); | ||
| 79 | break; | ||
| 80 | case IR::Type::F32: | ||
| 81 | ret.type = Type::U32; | ||
| 82 | ret.imm_u32 = Common::BitCast<u32>(value.F32()); | ||
| 83 | break; | ||
| 84 | case IR::Type::U64: | ||
| 85 | ret.type = Type::U64; | ||
| 86 | ret.imm_u64 = value.U64(); | ||
| 87 | break; | ||
| 88 | case IR::Type::F64: | ||
| 89 | ret.type = Type::U64; | ||
| 90 | ret.imm_u64 = Common::BitCast<u64>(value.F64()); | ||
| 91 | break; | ||
| 92 | default: | ||
| 93 | throw NotImplementedException("Immediate type {}", value.Type()); | ||
| 94 | } | ||
| 95 | return ret; | ||
| 96 | } | ||
| 97 | |||
| 98 | Register RegAlloc::Define(IR::Inst& inst, bool is_long) { | ||
| 99 | if (inst.HasUses()) { | ||
| 100 | inst.SetDefinition<Id>(Alloc(is_long)); | ||
| 101 | } else { | ||
| 102 | Id id{}; | ||
| 103 | id.is_long.Assign(is_long ? 1 : 0); | ||
| 104 | id.is_null.Assign(1); | ||
| 105 | inst.SetDefinition<Id>(id); | ||
| 106 | } | ||
| 107 | return Register{PeekInst(inst)}; | ||
| 108 | } | ||
| 109 | |||
| 110 | Value RegAlloc::PeekInst(IR::Inst& inst) { | ||
| 111 | Value ret; | ||
| 112 | ret.type = Type::Register; | ||
| 113 | ret.id = inst.Definition<Id>(); | ||
| 114 | return ret; | ||
| 115 | } | ||
| 116 | |||
| 117 | Value RegAlloc::ConsumeInst(IR::Inst& inst) { | ||
| 118 | Unref(inst); | ||
| 119 | return PeekInst(inst); | ||
| 120 | } | ||
| 121 | |||
| 122 | Id RegAlloc::Alloc(bool is_long) { | ||
| 123 | size_t& num_regs{is_long ? num_used_long_registers : num_used_registers}; | ||
| 124 | std::bitset<NUM_REGS>& use{is_long ? long_register_use : register_use}; | ||
| 125 | if (num_used_registers + num_used_long_registers < NUM_REGS) { | ||
| 126 | for (size_t reg = 0; reg < NUM_REGS; ++reg) { | ||
| 127 | if (use[reg]) { | ||
| 128 | continue; | ||
| 129 | } | ||
| 130 | num_regs = std::max(num_regs, reg + 1); | ||
| 131 | use[reg] = true; | ||
| 132 | Id ret{}; | ||
| 133 | ret.is_valid.Assign(1); | ||
| 134 | ret.is_long.Assign(is_long ? 1 : 0); | ||
| 135 | ret.is_spill.Assign(0); | ||
| 136 | ret.is_condition_code.Assign(0); | ||
| 137 | ret.is_null.Assign(0); | ||
| 138 | ret.index.Assign(static_cast<u32>(reg)); | ||
| 139 | return ret; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | throw NotImplementedException("Register spilling"); | ||
| 143 | } | ||
| 144 | |||
| 145 | void RegAlloc::Free(Id id) { | ||
| 146 | if (id.is_valid == 0) { | ||
| 147 | throw LogicError("Freeing invalid register"); | ||
| 148 | } | ||
| 149 | if (id.is_spill != 0) { | ||
| 150 | throw NotImplementedException("Free spill"); | ||
| 151 | } | ||
| 152 | if (id.is_long != 0) { | ||
| 153 | long_register_use[id.index] = false; | ||
| 154 | } else { | ||
| 155 | register_use[id.index] = false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | /*static*/ bool RegAlloc::IsAliased(const IR::Inst& inst) { | ||
| 160 | switch (inst.GetOpcode()) { | ||
| 161 | case IR::Opcode::Identity: | ||
| 162 | case IR::Opcode::BitCastU16F16: | ||
| 163 | case IR::Opcode::BitCastU32F32: | ||
| 164 | case IR::Opcode::BitCastU64F64: | ||
| 165 | case IR::Opcode::BitCastF16U16: | ||
| 166 | case IR::Opcode::BitCastF32U32: | ||
| 167 | case IR::Opcode::BitCastF64U64: | ||
| 168 | return true; | ||
| 169 | default: | ||
| 170 | return false; | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | /*static*/ IR::Inst& RegAlloc::AliasInst(IR::Inst& inst) { | ||
| 175 | IR::Inst* it{&inst}; | ||
| 176 | while (IsAliased(*it)) { | ||
| 177 | const IR::Value arg{it->Arg(0)}; | ||
| 178 | if (arg.IsImmediate()) { | ||
| 179 | break; | ||
| 180 | } | ||
| 181 | it = arg.InstRecursive(); | ||
| 182 | } | ||
| 183 | return *it; | ||
| 184 | } | ||
| 185 | |||
| 186 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h new file mode 100644 index 000000000..82aec66c6 --- /dev/null +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h | |||
| @@ -0,0 +1,303 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/bit_cast.h" | ||
| 12 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "shader_recompiler/exception.h" | ||
| 15 | |||
| 16 | namespace Shader::IR { | ||
| 17 | class Inst; | ||
| 18 | class Value; | ||
| 19 | } // namespace Shader::IR | ||
| 20 | |||
| 21 | namespace Shader::Backend::GLASM { | ||
| 22 | |||
| 23 | class EmitContext; | ||
| 24 | |||
| 25 | enum class Type : u32 { | ||
| 26 | Void, | ||
| 27 | Register, | ||
| 28 | U32, | ||
| 29 | U64, | ||
| 30 | }; | ||
| 31 | |||
| 32 | struct Id { | ||
| 33 | union { | ||
| 34 | u32 raw; | ||
| 35 | BitField<0, 1, u32> is_valid; | ||
| 36 | BitField<1, 1, u32> is_long; | ||
| 37 | BitField<2, 1, u32> is_spill; | ||
| 38 | BitField<3, 1, u32> is_condition_code; | ||
| 39 | BitField<4, 1, u32> is_null; | ||
| 40 | BitField<5, 27, u32> index; | ||
| 41 | }; | ||
| 42 | |||
| 43 | bool operator==(Id rhs) const noexcept { | ||
| 44 | return raw == rhs.raw; | ||
| 45 | } | ||
| 46 | bool operator!=(Id rhs) const noexcept { | ||
| 47 | return !operator==(rhs); | ||
| 48 | } | ||
| 49 | }; | ||
| 50 | static_assert(sizeof(Id) == sizeof(u32)); | ||
| 51 | |||
| 52 | struct Value { | ||
| 53 | Type type; | ||
| 54 | union { | ||
| 55 | Id id; | ||
| 56 | u32 imm_u32; | ||
| 57 | u64 imm_u64; | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool operator==(const Value& rhs) const noexcept { | ||
| 61 | if (type != rhs.type) { | ||
| 62 | return false; | ||
| 63 | } | ||
| 64 | switch (type) { | ||
| 65 | case Type::Void: | ||
| 66 | return true; | ||
| 67 | case Type::Register: | ||
| 68 | return id == rhs.id; | ||
| 69 | case Type::U32: | ||
| 70 | return imm_u32 == rhs.imm_u32; | ||
| 71 | case Type::U64: | ||
| 72 | return imm_u64 == rhs.imm_u64; | ||
| 73 | } | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | bool operator!=(const Value& rhs) const noexcept { | ||
| 77 | return !operator==(rhs); | ||
| 78 | } | ||
| 79 | }; | ||
| 80 | struct Register : Value {}; | ||
| 81 | struct ScalarRegister : Value {}; | ||
| 82 | struct ScalarU32 : Value {}; | ||
| 83 | struct ScalarS32 : Value {}; | ||
| 84 | struct ScalarF32 : Value {}; | ||
| 85 | struct ScalarF64 : Value {}; | ||
| 86 | |||
| 87 | class RegAlloc { | ||
| 88 | public: | ||
| 89 | RegAlloc() = default; | ||
| 90 | |||
| 91 | Register Define(IR::Inst& inst); | ||
| 92 | |||
| 93 | Register LongDefine(IR::Inst& inst); | ||
| 94 | |||
| 95 | [[nodiscard]] Value Peek(const IR::Value& value); | ||
| 96 | |||
| 97 | Value Consume(const IR::Value& value); | ||
| 98 | |||
| 99 | void Unref(IR::Inst& inst); | ||
| 100 | |||
| 101 | [[nodiscard]] Register AllocReg(); | ||
| 102 | |||
| 103 | [[nodiscard]] Register AllocLongReg(); | ||
| 104 | |||
| 105 | void FreeReg(Register reg); | ||
| 106 | |||
| 107 | void InvalidateConditionCodes() { | ||
| 108 | // This does nothing for now | ||
| 109 | } | ||
| 110 | |||
| 111 | [[nodiscard]] size_t NumUsedRegisters() const noexcept { | ||
| 112 | return num_used_registers; | ||
| 113 | } | ||
| 114 | |||
| 115 | [[nodiscard]] size_t NumUsedLongRegisters() const noexcept { | ||
| 116 | return num_used_long_registers; | ||
| 117 | } | ||
| 118 | |||
| 119 | [[nodiscard]] bool IsEmpty() const noexcept { | ||
| 120 | return register_use.none() && long_register_use.none(); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns true if the instruction is expected to be aliased to another | ||
| 124 | static bool IsAliased(const IR::Inst& inst); | ||
| 125 | |||
| 126 | /// Returns the underlying value out of an alias sequence | ||
| 127 | static IR::Inst& AliasInst(IR::Inst& inst); | ||
| 128 | |||
| 129 | private: | ||
| 130 | static constexpr size_t NUM_REGS = 4096; | ||
| 131 | static constexpr size_t NUM_ELEMENTS = 4; | ||
| 132 | |||
| 133 | Value MakeImm(const IR::Value& value); | ||
| 134 | |||
| 135 | Register Define(IR::Inst& inst, bool is_long); | ||
| 136 | |||
| 137 | Value PeekInst(IR::Inst& inst); | ||
| 138 | |||
| 139 | Value ConsumeInst(IR::Inst& inst); | ||
| 140 | |||
| 141 | Id Alloc(bool is_long); | ||
| 142 | |||
| 143 | void Free(Id id); | ||
| 144 | |||
| 145 | size_t num_used_registers{}; | ||
| 146 | size_t num_used_long_registers{}; | ||
| 147 | std::bitset<NUM_REGS> register_use{}; | ||
| 148 | std::bitset<NUM_REGS> long_register_use{}; | ||
| 149 | }; | ||
| 150 | |||
| 151 | template <bool scalar, typename FormatContext> | ||
| 152 | auto FormatTo(FormatContext& ctx, Id id) { | ||
| 153 | if (id.is_condition_code != 0) { | ||
| 154 | throw NotImplementedException("Condition code emission"); | ||
| 155 | } | ||
| 156 | if (id.is_spill != 0) { | ||
| 157 | throw NotImplementedException("Spill emission"); | ||
| 158 | } | ||
| 159 | if constexpr (scalar) { | ||
| 160 | if (id.is_null != 0) { | ||
| 161 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); | ||
| 162 | } | ||
| 163 | if (id.is_long != 0) { | ||
| 164 | return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); | ||
| 165 | } else { | ||
| 166 | return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); | ||
| 167 | } | ||
| 168 | } else { | ||
| 169 | if (id.is_null != 0) { | ||
| 170 | return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); | ||
| 171 | } | ||
| 172 | if (id.is_long != 0) { | ||
| 173 | return fmt::format_to(ctx.out(), "D{}", id.index.Value()); | ||
| 174 | } else { | ||
| 175 | return fmt::format_to(ctx.out(), "R{}", id.index.Value()); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | } // namespace Shader::Backend::GLASM | ||
| 181 | |||
| 182 | template <> | ||
| 183 | struct fmt::formatter<Shader::Backend::GLASM::Id> { | ||
| 184 | constexpr auto parse(format_parse_context& ctx) { | ||
| 185 | return ctx.begin(); | ||
| 186 | } | ||
| 187 | template <typename FormatContext> | ||
| 188 | auto format(Shader::Backend::GLASM::Id id, FormatContext& ctx) { | ||
| 189 | return Shader::Backend::GLASM::FormatTo<true>(ctx, id); | ||
| 190 | } | ||
| 191 | }; | ||
| 192 | |||
| 193 | template <> | ||
| 194 | struct fmt::formatter<Shader::Backend::GLASM::Register> { | ||
| 195 | constexpr auto parse(format_parse_context& ctx) { | ||
| 196 | return ctx.begin(); | ||
| 197 | } | ||
| 198 | template <typename FormatContext> | ||
| 199 | auto format(const Shader::Backend::GLASM::Register& value, FormatContext& ctx) { | ||
| 200 | if (value.type != Shader::Backend::GLASM::Type::Register) { | ||
| 201 | throw Shader::InvalidArgument("Register value type is not register"); | ||
| 202 | } | ||
| 203 | return Shader::Backend::GLASM::FormatTo<false>(ctx, value.id); | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | template <> | ||
| 208 | struct fmt::formatter<Shader::Backend::GLASM::ScalarRegister> { | ||
| 209 | constexpr auto parse(format_parse_context& ctx) { | ||
| 210 | return ctx.begin(); | ||
| 211 | } | ||
| 212 | template <typename FormatContext> | ||
| 213 | auto format(const Shader::Backend::GLASM::ScalarRegister& value, FormatContext& ctx) { | ||
| 214 | if (value.type != Shader::Backend::GLASM::Type::Register) { | ||
| 215 | throw Shader::InvalidArgument("Register value type is not register"); | ||
| 216 | } | ||
| 217 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 218 | } | ||
| 219 | }; | ||
| 220 | |||
| 221 | template <> | ||
| 222 | struct fmt::formatter<Shader::Backend::GLASM::ScalarU32> { | ||
| 223 | constexpr auto parse(format_parse_context& ctx) { | ||
| 224 | return ctx.begin(); | ||
| 225 | } | ||
| 226 | template <typename FormatContext> | ||
| 227 | auto format(const Shader::Backend::GLASM::ScalarU32& value, FormatContext& ctx) { | ||
| 228 | switch (value.type) { | ||
| 229 | case Shader::Backend::GLASM::Type::Void: | ||
| 230 | break; | ||
| 231 | case Shader::Backend::GLASM::Type::Register: | ||
| 232 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 233 | case Shader::Backend::GLASM::Type::U32: | ||
| 234 | return fmt::format_to(ctx.out(), "{}", value.imm_u32); | ||
| 235 | case Shader::Backend::GLASM::Type::U64: | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 239 | } | ||
| 240 | }; | ||
| 241 | |||
| 242 | template <> | ||
| 243 | struct fmt::formatter<Shader::Backend::GLASM::ScalarS32> { | ||
| 244 | constexpr auto parse(format_parse_context& ctx) { | ||
| 245 | return ctx.begin(); | ||
| 246 | } | ||
| 247 | template <typename FormatContext> | ||
| 248 | auto format(const Shader::Backend::GLASM::ScalarS32& value, FormatContext& ctx) { | ||
| 249 | switch (value.type) { | ||
| 250 | case Shader::Backend::GLASM::Type::Void: | ||
| 251 | break; | ||
| 252 | case Shader::Backend::GLASM::Type::Register: | ||
| 253 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 254 | case Shader::Backend::GLASM::Type::U32: | ||
| 255 | return fmt::format_to(ctx.out(), "{}", static_cast<s32>(value.imm_u32)); | ||
| 256 | case Shader::Backend::GLASM::Type::U64: | ||
| 257 | break; | ||
| 258 | } | ||
| 259 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 260 | } | ||
| 261 | }; | ||
| 262 | |||
| 263 | template <> | ||
| 264 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF32> { | ||
| 265 | constexpr auto parse(format_parse_context& ctx) { | ||
| 266 | return ctx.begin(); | ||
| 267 | } | ||
| 268 | template <typename FormatContext> | ||
| 269 | auto format(const Shader::Backend::GLASM::ScalarF32& value, FormatContext& ctx) { | ||
| 270 | switch (value.type) { | ||
| 271 | case Shader::Backend::GLASM::Type::Void: | ||
| 272 | break; | ||
| 273 | case Shader::Backend::GLASM::Type::Register: | ||
| 274 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 275 | case Shader::Backend::GLASM::Type::U32: | ||
| 276 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<f32>(value.imm_u32)); | ||
| 277 | case Shader::Backend::GLASM::Type::U64: | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 281 | } | ||
| 282 | }; | ||
| 283 | |||
| 284 | template <> | ||
| 285 | struct fmt::formatter<Shader::Backend::GLASM::ScalarF64> { | ||
| 286 | constexpr auto parse(format_parse_context& ctx) { | ||
| 287 | return ctx.begin(); | ||
| 288 | } | ||
| 289 | template <typename FormatContext> | ||
| 290 | auto format(const Shader::Backend::GLASM::ScalarF64& value, FormatContext& ctx) { | ||
| 291 | switch (value.type) { | ||
| 292 | case Shader::Backend::GLASM::Type::Void: | ||
| 293 | break; | ||
| 294 | case Shader::Backend::GLASM::Type::Register: | ||
| 295 | return Shader::Backend::GLASM::FormatTo<true>(ctx, value.id); | ||
| 296 | case Shader::Backend::GLASM::Type::U32: | ||
| 297 | break; | ||
| 298 | case Shader::Backend::GLASM::Type::U64: | ||
| 299 | return fmt::format_to(ctx.out(), "{}", Common::BitCast<f64>(value.imm_u64)); | ||
| 300 | } | ||
| 301 | throw Shader::InvalidArgument("Invalid value type {}", value.type); | ||
| 302 | } | ||
| 303 | }; | ||