diff options
| author | 2021-08-30 21:11:16 -0400 | |
|---|---|---|
| committer | 2021-08-31 13:40:39 -0400 | |
| commit | a5d9dcf3d916500a2955a655ed71e225746217bf (patch) | |
| tree | 89341fbaeadfe999d02fbcd3175cc4755a7cdf72 /src | |
| parent | emit_spirv_warp: Fix ballot related ops for 64-thread warp sizes (diff) | |
| download | yuzu-a5d9dcf3d916500a2955a655ed71e225746217bf.tar.gz yuzu-a5d9dcf3d916500a2955a655ed71e225746217bf.tar.xz yuzu-a5d9dcf3d916500a2955a655ed71e225746217bf.zip | |
emit_spirv_warp: Fix shuffle ops for 64-thread warp sizes
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index e7e7b4da1..cef52c56e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -53,10 +53,21 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | |||
| 53 | return ctx.OpSelect(ctx.U32[1], in_range, | 53 | return ctx.OpSelect(ctx.U32[1], in_range, |
| 54 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | 54 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); |
| 55 | } | 55 | } |
| 56 | |||
| 57 | Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { | ||
| 58 | const Id thirty_two{ctx.Const(32u)}; | ||
| 59 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; | ||
| 60 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 61 | return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 62 | } | ||
| 56 | } // Anonymous namespace | 63 | } // Anonymous namespace |
| 57 | 64 | ||
| 58 | Id EmitLaneId(EmitContext& ctx) { | 65 | Id EmitLaneId(EmitContext& ctx) { |
| 59 | return GetThreadId(ctx); | 66 | const Id id{GetThreadId(ctx)}; |
| 67 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 68 | return id; | ||
| 69 | } | ||
| 70 | return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); | ||
| 60 | } | 71 | } |
| 61 | 72 | ||
| 62 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | 73 | Id EmitVoteAll(EmitContext& ctx, Id pred) { |
| @@ -125,6 +136,14 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla | |||
| 125 | Id segmentation_mask) { | 136 | Id segmentation_mask) { |
| 126 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | 137 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; |
| 127 | const Id thread_id{GetThreadId(ctx)}; | 138 | const Id thread_id{GetThreadId(ctx)}; |
| 139 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 140 | const Id thirty_two{ctx.Const(32u)}; | ||
| 141 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; | ||
| 142 | const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; | ||
| 143 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 144 | index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); | ||
| 145 | clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 146 | } | ||
| 128 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | 147 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; |
| 129 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | 148 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; |
| 130 | 149 | ||
| @@ -139,6 +158,9 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla | |||
| 139 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 158 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 140 | Id segmentation_mask) { | 159 | Id segmentation_mask) { |
| 141 | const Id thread_id{GetThreadId(ctx)}; | 160 | const Id thread_id{GetThreadId(ctx)}; |
| 161 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 162 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 163 | } | ||
| 142 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 164 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 143 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | 165 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; |
| 144 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 166 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -150,6 +172,9 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | |||
| 150 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 172 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 151 | Id segmentation_mask) { | 173 | Id segmentation_mask) { |
| 152 | const Id thread_id{GetThreadId(ctx)}; | 174 | const Id thread_id{GetThreadId(ctx)}; |
| 175 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 176 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 177 | } | ||
| 153 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 178 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 154 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | 179 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; |
| 155 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 180 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -161,6 +186,9 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam | |||
| 161 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 186 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 162 | Id segmentation_mask) { | 187 | Id segmentation_mask) { |
| 163 | const Id thread_id{GetThreadId(ctx)}; | 188 | const Id thread_id{GetThreadId(ctx)}; |
| 189 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 190 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 191 | } | ||
| 164 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 192 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 165 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | 193 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; |
| 166 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 194 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |