diff options
| author | 2021-06-08 01:55:12 -0400 | |
|---|---|---|
| committer | 2021-07-22 21:51:37 -0400 | |
| commit | 8bb8bbf4ae2ef259857efe49436dfd71758ea092 (patch) | |
| tree | 73c66ae1b91a91ca569ebe0473df12e870fb254a /src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |
| parent | glsl: Implement indexed attribute loads (diff) | |
| download | yuzu-8bb8bbf4ae2ef259857efe49436dfd71758ea092.tar.gz yuzu-8bb8bbf4ae2ef259857efe49436dfd71758ea092.tar.xz yuzu-8bb8bbf4ae2ef259857efe49436dfd71758ea092.zip | |
glsl: Implement fswzadd
and wip nv thread shuffle impl
Diffstat (limited to 'src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 36 |
1 files changed, 31 insertions, 5 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index 38c49b164..6ced0776c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -35,9 +35,17 @@ std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp, | |||
| 35 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | 35 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; |
| 36 | return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); | 36 | return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask); |
| 37 | } | 37 | } |
| 38 | |||
| 39 | void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, | ||
| 40 | std::string_view value, std::string_view index, | ||
| 41 | [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) { | ||
| 42 | const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)}; | ||
| 43 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); | ||
| 44 | SetInBoundsFlag(ctx, inst); | ||
| 45 | } | ||
| 38 | } // namespace | 46 | } // namespace |
| 39 | 47 | ||
| 40 | void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst) { | 48 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { |
| 41 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); | 49 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); |
| 42 | } | 50 | } |
| 43 | 51 | ||
| @@ -103,6 +111,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | |||
| 103 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 111 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 104 | std::string_view index, std::string_view clamp, | 112 | std::string_view index, std::string_view clamp, |
| 105 | std::string_view segmentation_mask) { | 113 | std::string_view segmentation_mask) { |
| 114 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 115 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); | ||
| 116 | return; | ||
| 117 | } | ||
| 106 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | 118 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; |
| 107 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 119 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 108 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | 120 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; |
| @@ -117,6 +129,10 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 117 | 129 | ||
| 118 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | 130 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, |
| 119 | std::string_view clamp, std::string_view segmentation_mask) { | 131 | std::string_view clamp, std::string_view segmentation_mask) { |
| 132 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 133 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); | ||
| 134 | return; | ||
| 135 | } | ||
| 120 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 136 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 121 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 137 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; |
| 122 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | 138 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; |
| @@ -128,6 +144,10 @@ void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std | |||
| 128 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 144 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 129 | std::string_view index, std::string_view clamp, | 145 | std::string_view index, std::string_view clamp, |
| 130 | std::string_view segmentation_mask) { | 146 | std::string_view segmentation_mask) { |
| 147 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 148 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); | ||
| 149 | return; | ||
| 150 | } | ||
| 131 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 151 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 132 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 152 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; |
| 133 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | 153 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; |
| @@ -139,6 +159,10 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 139 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 159 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 140 | std::string_view index, std::string_view clamp, | 160 | std::string_view index, std::string_view clamp, |
| 141 | std::string_view segmentation_mask) { | 161 | std::string_view segmentation_mask) { |
| 162 | if (ctx.profile.support_gl_warp_intrinsics) { | ||
| 163 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); | ||
| 164 | return; | ||
| 165 | } | ||
| 142 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 166 | const auto thread_id{"gl_SubGroupInvocationARB"}; |
| 143 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 167 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; |
| 144 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | 168 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; |
| @@ -147,10 +171,12 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view val | |||
| 147 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 171 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 148 | } | 172 | } |
| 149 | 173 | ||
| 150 | void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | 174 | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b, |
| 151 | [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, | 175 | std::string_view swizzle) { |
| 152 | [[maybe_unused]] std::string_view swizzle) { | 176 | const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)}; |
| 153 | NotImplemented(); | 177 | const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask); |
| 178 | const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask); | ||
| 179 | ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b); | ||
| 154 | } | 180 | } |
| 155 | 181 | ||
| 156 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { | 182 | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) { |