diff options
Diffstat (limited to 'src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 59 |
1 files changed, 40 insertions, 19 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 0a488188b..0b85aaba2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -14,51 +14,72 @@ void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { | |||
| 14 | if (!in_bounds) { | 14 | if (!in_bounds) { |
| 15 | return; | 15 | return; |
| 16 | } | 16 | } |
| 17 | |||
| 18 | ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); | 17 | ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); |
| 19 | in_bounds->Invalidate(); | 18 | in_bounds->Invalidate(); |
| 20 | } | 19 | } |
| 20 | |||
| 21 | std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) { | ||
| 22 | return fmt::format("({}&{})", thread_id, segmentation_mask); | ||
| 23 | } | ||
| 24 | |||
| 25 | std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp, | ||
| 26 | std::string_view not_seg_mask) { | ||
| 27 | return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask); | ||
| 28 | } | ||
| 29 | |||
| 30 | std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, | ||
| 31 | std::string_view segmentation_mask) { | ||
| 32 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | ||
| 33 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | ||
| 34 | return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); | ||
| 35 | } | ||
| 21 | } // namespace | 36 | } // namespace |
| 22 | 37 | ||
| 23 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 38 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 24 | std::string_view index, std::string_view clamp, | 39 | std::string_view index, std::string_view clamp, |
| 25 | std::string_view segmentation_mask) { | 40 | std::string_view segmentation_mask) { |
| 26 | ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" | 41 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; |
| 27 | "&~{}));", | 42 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 28 | index, segmentation_mask, clamp, segmentation_mask); | 43 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; |
| 44 | const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; | ||
| 45 | |||
| 46 | const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; | ||
| 47 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | ||
| 48 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 29 | SetInBoundsFlag(ctx, inst); | 49 | SetInBoundsFlag(ctx, inst); |
| 30 | ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); | 50 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |
| 31 | } | 51 | } |
| 32 | 52 | ||
| 33 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | 53 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, |
| 34 | std::string_view clamp, std::string_view segmentation_mask) { | 54 | std::string_view clamp, std::string_view segmentation_mask) { |
| 35 | ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" | 55 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 36 | "&~{}));", | 56 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; |
| 37 | index, segmentation_mask, clamp, segmentation_mask); | 57 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; |
| 58 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | ||
| 38 | SetInBoundsFlag(ctx, inst); | 59 | SetInBoundsFlag(ctx, inst); |
| 39 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},gl_SubGroupInvocationARB-{}):" | 60 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |
| 40 | "{};", | ||
| 41 | inst, value, index, value); | ||
| 42 | } | 61 | } |
| 43 | 62 | ||
| 44 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 63 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 45 | std::string_view index, std::string_view clamp, | 64 | std::string_view index, std::string_view clamp, |
| 46 | std::string_view segmentation_mask) { | 65 | std::string_view segmentation_mask) { |
| 47 | ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" | 66 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 48 | "&~{}));", | 67 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; |
| 49 | index, segmentation_mask, clamp, segmentation_mask); | 68 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; |
| 69 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 50 | SetInBoundsFlag(ctx, inst); | 70 | SetInBoundsFlag(ctx, inst); |
| 51 | ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); | 71 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |
| 52 | } | 72 | } |
| 53 | 73 | ||
| 54 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 74 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 55 | std::string_view index, std::string_view clamp, | 75 | std::string_view index, std::string_view clamp, |
| 56 | std::string_view segmentation_mask) { | 76 | std::string_view segmentation_mask) { |
| 57 | ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" | 77 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 58 | "&~{}));", | 78 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; |
| 59 | index, segmentation_mask, clamp, segmentation_mask); | 79 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; |
| 80 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | ||
| 60 | SetInBoundsFlag(ctx, inst); | 81 | SetInBoundsFlag(ctx, inst); |
| 61 | ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); | 82 | ctx.AddU32("{}=shfl_in_bounds?{}:{};", inst, value, src_thread_id); |
| 62 | } | 83 | } |
| 63 | 84 | ||
| 64 | void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 85 | void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, |