summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp58
1 files changed, 28 insertions, 30 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 2c90f2368..c5db19d09 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); 58 ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value);
59} 59}
60 60
61Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { 61Id AddPartitionBase(EmitContext& ctx, Id thread_id) {
62 const Id thirty_two{ctx.Const(32u)}; 62 const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))};
63 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; 63 const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))};
64 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; 64 return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base);
65 return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
66} 65}
67} // Anonymous namespace 66} // Anonymous namespace
68 67
@@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
145Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 144Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
146 Id segmentation_mask) { 145 Id segmentation_mask) {
147 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; 146 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
148 const Id thread_id{GetThreadId(ctx)}; 147 const Id thread_id{EmitLaneId(ctx)};
149 if (ctx.profile.warp_size_potentially_larger_than_guest) {
150 const Id thirty_two{ctx.Const(32u)};
151 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
152 const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
153 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
154 index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
155 clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
156 }
157 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; 148 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
158 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; 149 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
159 150
160 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; 151 const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)};
161 const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; 152 Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)};
162 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 153 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
163 154
155 if (ctx.profile.warp_size_potentially_larger_than_guest) {
156 src_thread_id = AddPartitionBase(ctx, src_thread_id);
157 }
158
164 SetInBoundsFlag(inst, in_range); 159 SetInBoundsFlag(inst, in_range);
165 return SelectValue(ctx, in_range, value, src_thread_id); 160 return SelectValue(ctx, in_range, value, src_thread_id);
166} 161}
167 162
168Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 163Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
169 Id segmentation_mask) { 164 Id segmentation_mask) {
170 const Id thread_id{GetThreadId(ctx)}; 165 const Id thread_id{EmitLaneId(ctx)};
171 if (ctx.profile.warp_size_potentially_larger_than_guest) {
172 clamp = GetUpperClamp(ctx, thread_id, clamp);
173 }
174 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 166 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
175 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; 167 Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
176 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 168 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
177 169
170 if (ctx.profile.warp_size_potentially_larger_than_guest) {
171 src_thread_id = AddPartitionBase(ctx, src_thread_id);
172 }
173
178 SetInBoundsFlag(inst, in_range); 174 SetInBoundsFlag(inst, in_range);
179 return SelectValue(ctx, in_range, value, src_thread_id); 175 return SelectValue(ctx, in_range, value, src_thread_id);
180} 176}
181 177
182Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 178Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
183 Id segmentation_mask) { 179 Id segmentation_mask) {
184 const Id thread_id{GetThreadId(ctx)}; 180 const Id thread_id{EmitLaneId(ctx)};
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 clamp = GetUpperClamp(ctx, thread_id, clamp);
187 }
188 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 181 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
189 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; 182 Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
190 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 183 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
191 184
185 if (ctx.profile.warp_size_potentially_larger_than_guest) {
186 src_thread_id = AddPartitionBase(ctx, src_thread_id);
187 }
188
192 SetInBoundsFlag(inst, in_range); 189 SetInBoundsFlag(inst, in_range);
193 return SelectValue(ctx, in_range, value, src_thread_id); 190 return SelectValue(ctx, in_range, value, src_thread_id);
194} 191}
195 192
196Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 193Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
197 Id segmentation_mask) { 194 Id segmentation_mask) {
198 const Id thread_id{GetThreadId(ctx)}; 195 const Id thread_id{EmitLaneId(ctx)};
199 if (ctx.profile.warp_size_potentially_larger_than_guest) {
200 clamp = GetUpperClamp(ctx, thread_id, clamp);
201 }
202 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 196 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
203 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; 197 Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
204 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 198 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
205 199
200 if (ctx.profile.warp_size_potentially_larger_than_guest) {
201 src_thread_id = AddPartitionBase(ctx, src_thread_id);
202 }
203
206 SetInBoundsFlag(inst, in_range); 204 SetInBoundsFlag(inst, in_range);
207 return SelectValue(ctx, in_range, value, src_thread_id); 205 return SelectValue(ctx, in_range, value, src_thread_id);
208} 206}