diff options
Diffstat (limited to 'src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp new file mode 100644 index 000000000..a982dd8a2 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 8 | #include "shader_recompiler/backend/glsl/emit_glsl_instructions.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 10 | #include "shader_recompiler/profile.h" | ||
| 11 | |||
| 12 | namespace Shader::Backend::GLSL { | ||
| 13 | namespace { | ||
| 14 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { | ||
| 15 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 16 | if (!in_bounds) { | ||
| 17 | return; | ||
| 18 | } | ||
| 19 | ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); | ||
| 20 | in_bounds->Invalidate(); | ||
| 21 | } | ||
| 22 | |||
| 23 | std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { | ||
| 24 | return fmt::format("({}&{})", thread_id, segmentation_mask); | ||
| 25 | } | ||
| 26 | |||
| 27 | std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, | ||
| 28 | std::string_view not_seg_mask) { | ||
| 29 | return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); | ||
| 30 | } | ||
| 31 | |||
| 32 | std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, | ||
| 33 | std::string_view segmentation_mask) { | ||
| 34 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 35 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 36 | return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); | ||
| 37 | } | ||
| 38 | |||
| 39 | void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, | ||
| 40 | std::string_view value, std::string_view index, | ||
| 41 | [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { | ||
| 42 | const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; | ||
| 43 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); | ||
| 44 | SetInBoundsFlag(ctx, inst); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | ||
| 49 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); | ||
| 50 | } | ||
| 51 | |||
| 52 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 53 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 54 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||
| 55 | } else { | ||
| 56 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 57 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 58 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 63 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 64 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | ||
| 65 | } else { | ||
| 66 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 67 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 68 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 74 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||
| 75 | } else { | ||
| 76 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 77 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 78 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 79 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 85 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | ||
| 86 | } else { | ||
| 87 | ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 92 | ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); | ||
| 93 | } | ||
| 94 | |||
| 95 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 96 | ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); | ||
| 97 | } | ||
| 98 | |||
| 99 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 100 | ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 104 | ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); | ||
| 105 | } | ||
| 106 | |||
| 107 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | ||
| 108 | ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); | ||
| 109 | } | ||
| 110 | |||
| 111 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 112 | std::string_view index, std::string_view clamp, | ||
| 113 | std::string_view segmentation_mask) { | ||
| 114 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 115 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); | ||
| 116 | return; | ||
| 117 | } | ||
| 118 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 119 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 120 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 121 | const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; | ||
| 122 | |||
| 123 | const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; | ||
| 124 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | ||
| 125 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 126 | SetInBoundsFlag(ctx, inst); | ||
| 127 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 128 | } | ||
| 129 | |||
| 130 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | ||
| 131 | std::string_view clamp, std::string_view segmentation_mask) { | ||
| 132 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 133 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 137 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 138 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | ||
| 139 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | ||
| 140 | SetInBoundsFlag(ctx, inst); | ||
| 141 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 142 | } | ||
| 143 | |||
| 144 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 145 | std::string_view index, std::string_view clamp, | ||
| 146 | std::string_view segmentation_mask) { | ||
| 147 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 148 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 152 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 153 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | ||
| 154 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 155 | SetInBoundsFlag(ctx, inst); | ||
| 156 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 157 | } | ||
| 158 | |||
| 159 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | ||
| 160 | std::string_view index, std::string_view clamp, | ||
| 161 | std::string_view segmentation_mask) { | ||
| 162 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 163 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); | ||
| 164 | return; | ||
| 165 | } | ||
| 166 | const auto thread_id{"gl_SubGroupInvocationARB"}; | ||
| 167 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | ||
| 168 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | ||
| 169 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 170 | SetInBoundsFlag(ctx, inst); | ||
| 171 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | ||
| 172 | } | ||
| 173 | |||
| 174 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, | ||
| 175 | std::string_view swizzle) { | ||
| 176 | const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; | ||
| 177 | const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); | ||
| 178 | const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); | ||
| 179 | ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); | ||
| 180 | } | ||
| 181 | |||
| 182 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 183 | if (ctx.profile.support_gl_derivative_control) { | ||
| 184 | ctx.AddF32("{}=dFdxFine({});", inst, op_a); | ||
| 185 | } else { | ||
| 186 | LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx"); | ||
| 187 | ctx.AddF32("{}=dFdx({});", inst, op_a); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 192 | if (ctx.profile.support_gl_derivative_control) { | ||
| 193 | ctx.AddF32("{}=dFdyFine({});", inst, op_a); | ||
| 194 | } else { | ||
| 195 | LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy"); | ||
| 196 | ctx.AddF32("{}=dFdy({});", inst, op_a); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 201 | if (ctx.profile.support_gl_derivative_control) { | ||
| 202 | ctx.AddF32("{}=dFdxCoarse({});", inst, op_a); | ||
| 203 | } else { | ||
| 204 | LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx"); | ||
| 205 | ctx.AddF32("{}=dFdx({});", inst, op_a); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | |||
| 209 | void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | ||
| 210 | if (ctx.profile.support_gl_derivative_control) { | ||
| 211 | ctx.AddF32("{}=dFdyCoarse({});", inst, op_a); | ||
| 212 | } else { | ||
| 213 | LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy"); | ||
| 214 | ctx.AddF32("{}=dFdy({});", inst, op_a); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | } // namespace Shader::Backend::GLSL | ||