diff options
| author | 2021-03-25 11:31:37 -0400 | |
|---|---|---|
| committer | 2021-07-22 21:51:24 -0400 | |
| commit | 32c5483beb2f79f5d55eb2906f2bfdfa1698bca3 (patch) | |
| tree | bca00dad85f6823746aee66f43dc0cbe2f337481 /src/shader_recompiler/backend | |
| parent | shader: Track first bindless argument instead of the instruction itself (diff) | |
| download | yuzu-32c5483beb2f79f5d55eb2906f2bfdfa1698bca3.tar.gz yuzu-32c5483beb2f79f5d55eb2906f2bfdfa1698bca3.tar.xz yuzu-32c5483beb2f79f5d55eb2906f2bfdfa1698bca3.zip | |
shader: Implement SHFL
Diffstat (limited to 'src/shader_recompiler/backend')
5 files changed, 151 insertions, 60 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea46af244..5db4a9082 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { | |||
| 274 | if (info.uses_local_invocation_id) { | 274 | if (info.uses_local_invocation_id) { |
| 275 | local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); | 275 | local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); |
| 276 | } | 276 | } |
| 277 | if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { | 277 | if (info.uses_subgroup_invocation_id || |
| 278 | (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { | ||
| 278 | subgroup_local_invocation_id = | 279 | subgroup_local_invocation_id = |
| 279 | DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); | 280 | DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); |
| 280 | } | 281 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 107403912..cee72f50d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||
| 224 | ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | 224 | ctx.AddExtension("SPV_KHR_shader_draw_parameters"); |
| 225 | ctx.AddCapability(spv::Capability::DrawParameters); | 225 | ctx.AddCapability(spv::Capability::DrawParameters); |
| 226 | } | 226 | } |
| 227 | if (info.uses_subgroup_vote && profile.support_vote) { | 227 | if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { |
| 228 | ctx.AddExtension("SPV_KHR_shader_ballot"); | 228 | ctx.AddExtension("SPV_KHR_shader_ballot"); |
| 229 | ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | 229 | ctx.AddCapability(spv::Capability::SubgroupBallotKHR); |
| 230 | if (!profile.warp_size_potentially_larger_than_guest) { | 230 | if (!profile.warp_size_potentially_larger_than_guest) { |
| @@ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { | |||
| 315 | throw LogicError("Unreachable instruction"); | 315 | throw LogicError("Unreachable instruction"); |
| 316 | } | 316 | } |
| 317 | 317 | ||
| 318 | void EmitGetInBoundsFromOp(EmitContext&) { | ||
| 319 | throw LogicError("Unreachable instruction"); | ||
| 320 | } | ||
| 321 | |||
| 318 | } // namespace Shader::Backend::SPIRV | 322 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 6d4adafc7..a233a4817 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); | |||
| 158 | void EmitGetCarryFromOp(EmitContext& ctx); | 158 | void EmitGetCarryFromOp(EmitContext& ctx); |
| 159 | void EmitGetOverflowFromOp(EmitContext& ctx); | 159 | void EmitGetOverflowFromOp(EmitContext& ctx); |
| 160 | void EmitGetSparseFromOp(EmitContext& ctx); | 160 | void EmitGetSparseFromOp(EmitContext& ctx); |
| 161 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 161 | Id EmitFPAbs16(EmitContext& ctx, Id value); | 162 | Id EmitFPAbs16(EmitContext& ctx, Id value); |
| 162 | Id EmitFPAbs32(EmitContext& ctx, Id value); | 163 | Id EmitFPAbs32(EmitContext& ctx, Id value); |
| 163 | Id EmitFPAbs64(EmitContext& ctx, Id value); | 164 | Id EmitFPAbs64(EmitContext& ctx, Id value); |
| @@ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); | |||
| 355 | Id EmitVoteAny(EmitContext& ctx, Id pred); | 356 | Id EmitVoteAny(EmitContext& ctx, Id pred); |
| 356 | Id EmitVoteEqual(EmitContext& ctx, Id pred); | 357 | Id EmitVoteEqual(EmitContext& ctx, Id pred); |
| 357 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | 358 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); |
| 359 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 360 | Id segmentation_mask); | ||
| 361 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 362 | Id segmentation_mask); | ||
| 363 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 364 | Id segmentation_mask); | ||
| 365 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 366 | Id segmentation_mask); | ||
| 358 | 367 | ||
| 359 | } // namespace Shader::Backend::SPIRV | 368 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp deleted file mode 100644 index a63677ef2..000000000 --- a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||
| 10 | const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||
| 13 | } | ||
| 14 | } // Anonymous namespace | ||
| 15 | |||
| 16 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 17 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 18 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 19 | } | ||
| 20 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 21 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 22 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 23 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 24 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 25 | } | ||
| 26 | |||
| 27 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 28 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 29 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 30 | } | ||
| 31 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 32 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 33 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 34 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 35 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 36 | } | ||
| 37 | |||
| 38 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 39 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 40 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 41 | } | ||
| 42 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 43 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 44 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 45 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 46 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 47 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 48 | } | ||
| 49 | |||
| 50 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 51 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 52 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 53 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 54 | } | ||
| 55 | return LargeWarpBallot(ctx, ballot); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..44d8a347f --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||
| 10 | const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||
| 13 | } | ||
| 14 | |||
| 15 | void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||
| 16 | IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 17 | if (!in_bounds) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | in_bounds->SetDefinition(result); | ||
| 21 | in_bounds->Invalidate(); | ||
| 22 | } | ||
| 23 | |||
| 24 | Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||
| 25 | return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||
| 26 | } | ||
| 27 | |||
| 28 | Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||
| 29 | return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||
| 30 | ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||
| 31 | } | ||
| 32 | |||
| 33 | Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||
| 34 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 35 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 36 | return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||
| 37 | } | ||
| 38 | |||
| 39 | Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||
| 40 | return ctx.OpSelect(ctx.U32[1], in_range, | ||
| 41 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||
| 42 | } | ||
| 43 | } // Anonymous namespace | ||
| 44 | |||
| 45 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 46 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 47 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 48 | } | ||
| 49 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 50 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 51 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 52 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 53 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 54 | } | ||
| 55 | |||
| 56 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 57 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 58 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 59 | } | ||
| 60 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 61 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 62 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 63 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 64 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 68 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 69 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 70 | } | ||
| 71 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 72 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 73 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 74 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 75 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 76 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 80 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 81 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 82 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 83 | } | ||
| 84 | return LargeWarpBallot(ctx, ballot); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 88 | Id segmentation_mask) { | ||
| 89 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 90 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 91 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 92 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||
| 93 | |||
| 94 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||
| 95 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||
| 96 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 97 | |||
| 98 | SetInBoundsFlag(inst, in_range); | ||
| 99 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 103 | Id segmentation_mask) { | ||
| 104 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 105 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 106 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||
| 107 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 108 | |||
| 109 | SetInBoundsFlag(inst, in_range); | ||
| 110 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 111 | } | ||
| 112 | |||
| 113 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 114 | Id segmentation_mask) { | ||
| 115 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 116 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 117 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||
| 118 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 119 | |||
| 120 | SetInBoundsFlag(inst, in_range); | ||
| 121 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 122 | } | ||
| 123 | |||
| 124 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 125 | Id segmentation_mask) { | ||
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 127 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 128 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||
| 129 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 130 | |||
| 131 | SetInBoundsFlag(inst, in_range); | ||
| 132 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Shader::Backend::SPIRV | ||