diff options
| author | 2021-08-30 20:39:21 -0400 | |
|---|---|---|
| committer | 2021-08-31 13:40:12 -0400 | |
| commit | 95213270efbc378561f18cc6eeb4428c2905b154 (patch) | |
| tree | 1c55902fdb3ca12e9264311307057d29ec4e403a /src/shader_recompiler/backend/spirv | |
| parent | Merge pull request #6897 from FernandoS27/pineapple-does-not-belong-in-pizza (diff) | |
| download | yuzu-95213270efbc378561f18cc6eeb4428c2905b154.tar.gz yuzu-95213270efbc378561f18cc6eeb4428c2905b154.tar.xz yuzu-95213270efbc378561f18cc6eeb4428c2905b154.zip | |
emit_spirv_warp: Fix ballot related ops for 64-thread warp sizes
Diffstat (limited to 'src/shader_recompiler/backend/spirv')
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 78b1e1ba7..e7e7b4da1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -7,8 +7,13 @@ | |||
| 7 | 7 | ||
| 8 | namespace Shader::Backend::SPIRV { | 8 | namespace Shader::Backend::SPIRV { |
| 9 | namespace { | 9 | namespace { |
| 10 | Id GetThreadId(EmitContext& ctx) { | ||
| 11 | return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); | ||
| 12 | } | ||
| 13 | |||
| 10 | Id WarpExtract(EmitContext& ctx, Id value) { | 14 | Id WarpExtract(EmitContext& ctx, Id value) { |
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 15 | const Id thread_id{GetThreadId(ctx)}; |
| 16 | const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | 17 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); |
| 13 | } | 18 | } |
| 14 | 19 | ||
| @@ -51,11 +56,7 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | |||
| 51 | } // Anonymous namespace | 56 | } // Anonymous namespace |
| 52 | 57 | ||
| 53 | Id EmitLaneId(EmitContext& ctx) { | 58 | Id EmitLaneId(EmitContext& ctx) { |
| 54 | const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 59 | return GetThreadId(ctx); |
| 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 56 | return id; | ||
| 57 | } | ||
| 58 | return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); | ||
| 59 | } | 60 | } |
| 60 | 61 | ||
| 61 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | 62 | Id EmitVoteAll(EmitContext& ctx, Id pred) { |
| @@ -123,7 +124,7 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { | |||
| 123 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 124 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 124 | Id segmentation_mask) { | 125 | Id segmentation_mask) { |
| 125 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | 126 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; |
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 127 | const Id thread_id{GetThreadId(ctx)}; |
| 127 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | 128 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; |
| 128 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | 129 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; |
| 129 | 130 | ||
| @@ -137,7 +138,7 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla | |||
| 137 | 138 | ||
| 138 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 139 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 139 | Id segmentation_mask) { | 140 | Id segmentation_mask) { |
| 140 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 141 | const Id thread_id{GetThreadId(ctx)}; |
| 141 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 142 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 142 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | 143 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; |
| 143 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 144 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -148,7 +149,7 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | |||
| 148 | 149 | ||
| 149 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 150 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 150 | Id segmentation_mask) { | 151 | Id segmentation_mask) { |
| 151 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 152 | const Id thread_id{GetThreadId(ctx)}; |
| 152 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 153 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 153 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | 154 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; |
| 154 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 155 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -159,7 +160,7 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam | |||
| 159 | 160 | ||
| 160 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 161 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 161 | Id segmentation_mask) { | 162 | Id segmentation_mask) { |
| 162 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 163 | const Id thread_id{GetThreadId(ctx)}; |
| 163 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 164 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 164 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | 165 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; |
| 165 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 166 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |