diff options
Diffstat (limited to 'src/shader_recompiler/backend/spirv')
4 files changed, 70 insertions, 42 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index db9c94ce8..0cd87a48f 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -321,8 +321,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 321 | case IR::Attribute::PositionY: | 321 | case IR::Attribute::PositionY: |
| 322 | case IR::Attribute::PositionZ: | 322 | case IR::Attribute::PositionZ: |
| 323 | case IR::Attribute::PositionW: | 323 | case IR::Attribute::PositionW: |
| 324 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, | 324 | return ctx.OpLoad( |
| 325 | ctx.Const(element))); | 325 | ctx.F32[1], |
| 326 | ctx.need_input_position_indirect | ||
| 327 | ? AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.u32_zero_value, | ||
| 328 | ctx.Const(element)) | ||
| 329 | : AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, ctx.Const(element))); | ||
| 326 | case IR::Attribute::InstanceId: | 330 | case IR::Attribute::InstanceId: |
| 327 | if (ctx.profile.support_vertex_instance_id) { | 331 | if (ctx.profile.support_vertex_instance_id) { |
| 328 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); | 332 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 2c90f2368..c5db19d09 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -58,11 +58,10 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | |||
| 58 | ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); | 58 | ctx.OpGroupNonUniformShuffle(ctx.U32[1], SubgroupScope(ctx), value, src_thread_id), value); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { | 61 | Id AddPartitionBase(EmitContext& ctx, Id thread_id) { |
| 62 | const Id thirty_two{ctx.Const(32u)}; | 62 | const Id partition_idx{ctx.OpShiftRightLogical(ctx.U32[1], GetThreadId(ctx), ctx.Const(5u))}; |
| 63 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; | 63 | const Id partition_base{ctx.OpShiftLeftLogical(ctx.U32[1], partition_idx, ctx.Const(5u))}; |
| 64 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | 64 | return ctx.OpIAdd(ctx.U32[1], thread_id, partition_base); |
| 65 | return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 66 | } | 65 | } |
| 67 | } // Anonymous namespace | 66 | } // Anonymous namespace |
| 68 | 67 | ||
| @@ -145,64 +144,63 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { | |||
| 145 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 144 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 146 | Id segmentation_mask) { | 145 | Id segmentation_mask) { |
| 147 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | 146 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; |
| 148 | const Id thread_id{GetThreadId(ctx)}; | 147 | const Id thread_id{EmitLaneId(ctx)}; |
| 149 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 150 | const Id thirty_two{ctx.Const(32u)}; | ||
| 151 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; | ||
| 152 | const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; | ||
| 153 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 154 | index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); | ||
| 155 | clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 156 | } | ||
| 157 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | 148 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; |
| 158 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | 149 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; |
| 159 | 150 | ||
| 160 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | 151 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; |
| 161 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | 152 | Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; |
| 162 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 153 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 163 | 154 | ||
| 155 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 156 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 157 | } | ||
| 158 | |||
| 164 | SetInBoundsFlag(inst, in_range); | 159 | SetInBoundsFlag(inst, in_range); |
| 165 | return SelectValue(ctx, in_range, value, src_thread_id); | 160 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 166 | } | 161 | } |
| 167 | 162 | ||
| 168 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 163 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 169 | Id segmentation_mask) { | 164 | Id segmentation_mask) { |
| 170 | const Id thread_id{GetThreadId(ctx)}; | 165 | const Id thread_id{EmitLaneId(ctx)}; |
| 171 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 172 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 173 | } | ||
| 174 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 166 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 175 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | 167 | Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; |
| 176 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 168 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 177 | 169 | ||
| 170 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 171 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 172 | } | ||
| 173 | |||
| 178 | SetInBoundsFlag(inst, in_range); | 174 | SetInBoundsFlag(inst, in_range); |
| 179 | return SelectValue(ctx, in_range, value, src_thread_id); | 175 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 180 | } | 176 | } |
| 181 | 177 | ||
| 182 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 178 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 183 | Id segmentation_mask) { | 179 | Id segmentation_mask) { |
| 184 | const Id thread_id{GetThreadId(ctx)}; | 180 | const Id thread_id{EmitLaneId(ctx)}; |
| 185 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 186 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 187 | } | ||
| 188 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 181 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 189 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | 182 | Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; |
| 190 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 183 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 191 | 184 | ||
| 185 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 186 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 187 | } | ||
| 188 | |||
| 192 | SetInBoundsFlag(inst, in_range); | 189 | SetInBoundsFlag(inst, in_range); |
| 193 | return SelectValue(ctx, in_range, value, src_thread_id); | 190 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 194 | } | 191 | } |
| 195 | 192 | ||
| 196 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 193 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 197 | Id segmentation_mask) { | 194 | Id segmentation_mask) { |
| 198 | const Id thread_id{GetThreadId(ctx)}; | 195 | const Id thread_id{EmitLaneId(ctx)}; |
| 199 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 200 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 201 | } | ||
| 202 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 196 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 203 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | 197 | Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; |
| 204 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 198 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| 205 | 199 | ||
| 200 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 201 | src_thread_id = AddPartitionBase(ctx, src_thread_id); | ||
| 202 | } | ||
| 203 | |||
| 206 | SetInBoundsFlag(inst, in_range); | 204 | SetInBoundsFlag(inst, in_range); |
| 207 | return SelectValue(ctx, in_range, value, src_thread_id); | 205 | return SelectValue(ctx, in_range, value, src_thread_id); |
| 208 | } | 206 | } |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index ecb2db494..a0c155fdb 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -544,7 +544,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||
| 544 | U16 = Name(TypeInt(16, false), "u16"); | 544 | U16 = Name(TypeInt(16, false), "u16"); |
| 545 | S16 = Name(TypeInt(16, true), "s16"); | 545 | S16 = Name(TypeInt(16, true), "s16"); |
| 546 | } | 546 | } |
| 547 | if (info.uses_int64) { | 547 | if (info.uses_int64 && profile.support_int64) { |
| 548 | AddCapability(spv::Capability::Int64); | 548 | AddCapability(spv::Capability::Int64); |
| 549 | U64 = Name(TypeInt(64, false), "u64"); | 549 | U64 = Name(TypeInt(64, false), "u64"); |
| 550 | } | 550 | } |
| @@ -721,9 +721,21 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | |||
| 721 | size_t label_index{0}; | 721 | size_t label_index{0}; |
| 722 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | 722 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { |
| 723 | AddLabel(labels[label_index]); | 723 | AddLabel(labels[label_index]); |
| 724 | const Id pointer{is_array | 724 | const Id pointer{[&]() { |
| 725 | ? OpAccessChain(input_f32, input_position, vertex, masked_index) | 725 | if (need_input_position_indirect) { |
| 726 | : OpAccessChain(input_f32, input_position, masked_index)}; | 726 | if (is_array) |
| 727 | return OpAccessChain(input_f32, input_position, vertex, u32_zero_value, | ||
| 728 | masked_index); | ||
| 729 | else | ||
| 730 | return OpAccessChain(input_f32, input_position, u32_zero_value, | ||
| 731 | masked_index); | ||
| 732 | } else { | ||
| 733 | if (is_array) | ||
| 734 | return OpAccessChain(input_f32, input_position, vertex, masked_index); | ||
| 735 | else | ||
| 736 | return OpAccessChain(input_f32, input_position, masked_index); | ||
| 737 | } | ||
| 738 | }()}; | ||
| 727 | const Id result{OpLoad(F32[1], pointer)}; | 739 | const Id result{OpLoad(F32[1], pointer)}; |
| 728 | OpReturnValue(result); | 740 | OpReturnValue(result); |
| 729 | ++label_index; | 741 | ++label_index; |
| @@ -1367,12 +1379,25 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1367 | Decorate(layer, spv::Decoration::Flat); | 1379 | Decorate(layer, spv::Decoration::Flat); |
| 1368 | } | 1380 | } |
| 1369 | if (loads.AnyComponent(IR::Attribute::PositionX)) { | 1381 | if (loads.AnyComponent(IR::Attribute::PositionX)) { |
| 1370 | const bool is_fragment{stage != Stage::Fragment}; | 1382 | const bool is_fragment{stage == Stage::Fragment}; |
| 1371 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord}; | 1383 | if (!is_fragment && profile.has_broken_spirv_position_input) { |
| 1372 | input_position = DefineInput(*this, F32[4], true, built_in); | 1384 | need_input_position_indirect = true; |
| 1373 | if (profile.support_geometry_shader_passthrough) { | 1385 | |
| 1374 | if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | 1386 | const Id input_position_struct = TypeStruct(F32[4]); |
| 1375 | Decorate(input_position, spv::Decoration::PassthroughNV); | 1387 | input_position = DefineInput(*this, input_position_struct, true); |
| 1388 | |||
| 1389 | MemberDecorate(input_position_struct, 0, spv::Decoration::BuiltIn, | ||
| 1390 | static_cast<unsigned>(spv::BuiltIn::Position)); | ||
| 1391 | Decorate(input_position_struct, spv::Decoration::Block); | ||
| 1392 | } else { | ||
| 1393 | const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::FragCoord | ||
| 1394 | : spv::BuiltIn::Position}; | ||
| 1395 | input_position = DefineInput(*this, F32[4], true, built_in); | ||
| 1396 | |||
| 1397 | if (profile.support_geometry_shader_passthrough) { | ||
| 1398 | if (info.passthrough.AnyComponent(IR::Attribute::PositionX)) { | ||
| 1399 | Decorate(input_position, spv::Decoration::PassthroughNV); | ||
| 1400 | } | ||
| 1376 | } | 1401 | } |
| 1377 | } | 1402 | } |
| 1378 | } | 1403 | } |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 4414a5169..dbc5c55b9 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h | |||
| @@ -280,6 +280,7 @@ public: | |||
| 280 | Id write_global_func_u32x2{}; | 280 | Id write_global_func_u32x2{}; |
| 281 | Id write_global_func_u32x4{}; | 281 | Id write_global_func_u32x4{}; |
| 282 | 282 | ||
| 283 | bool need_input_position_indirect{}; | ||
| 283 | Id input_position{}; | 284 | Id input_position{}; |
| 284 | std::array<Id, 32> input_generics{}; | 285 | std::array<Id, 32> input_generics{}; |
| 285 | 286 | ||