diff options
| author | 2021-07-25 11:39:04 -0700 | |
|---|---|---|
| committer | 2021-07-25 11:39:04 -0700 | |
| commit | 98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch) | |
| tree | 816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |
| parent | Merge pull request #6699 from lat9nq/common-threads (diff) | |
| parent | shader: Support out of bound local memory reads and immediate writes (diff) | |
| download | yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip | |
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..78b1e1ba7 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -0,0 +1,203 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" | ||
| 7 | |||
| 8 | namespace Shader::Backend::SPIRV { | ||
| 9 | namespace { | ||
| 10 | Id WarpExtract(EmitContext& ctx, Id value) { | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | ||
| 13 | } | ||
| 14 | |||
| 15 | Id LoadMask(EmitContext& ctx, Id mask) { | ||
| 16 | const Id value{ctx.OpLoad(ctx.U32[4], mask)}; | ||
| 17 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 18 | return ctx.OpCompositeExtract(ctx.U32[1], value, 0U); | ||
| 19 | } | ||
| 20 | return WarpExtract(ctx, value); | ||
| 21 | } | ||
| 22 | |||
| 23 | void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||
| 24 | IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 25 | if (!in_bounds) { | ||
| 26 | return; | ||
| 27 | } | ||
| 28 | in_bounds->SetDefinition(result); | ||
| 29 | in_bounds->Invalidate(); | ||
| 30 | } | ||
| 31 | |||
| 32 | Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||
| 33 | return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||
| 34 | } | ||
| 35 | |||
| 36 | Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||
| 37 | return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||
| 38 | ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||
| 39 | } | ||
| 40 | |||
| 41 | Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||
| 42 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 43 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 44 | return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||
| 45 | } | ||
| 46 | |||
| 47 | Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||
| 48 | return ctx.OpSelect(ctx.U32[1], in_range, | ||
| 49 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||
| 50 | } | ||
| 51 | } // Anonymous namespace | ||
| 52 | |||
| 53 | Id EmitLaneId(EmitContext& ctx) { | ||
| 54 | const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 56 | return id; | ||
| 57 | } | ||
| 58 | return ctx.OpBitwiseAnd(ctx.U32[1], id, ctx.Const(31U)); | ||
| 59 | } | ||
| 60 | |||
| 61 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 62 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 63 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 64 | } | ||
| 65 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 66 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 67 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 68 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 69 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 70 | } | ||
| 71 | |||
| 72 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 74 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 75 | } | ||
| 76 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 77 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 78 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 79 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 80 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 85 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 86 | } | ||
| 87 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 88 | const Id active_mask{WarpExtract(ctx, mask_ballot)}; | ||
| 89 | const Id ballot{WarpExtract(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 90 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 91 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 92 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 96 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 97 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 98 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 99 | } | ||
| 100 | return WarpExtract(ctx, ballot); | ||
| 101 | } | ||
| 102 | |||
| 103 | Id EmitSubgroupEqMask(EmitContext& ctx) { | ||
| 104 | return LoadMask(ctx, ctx.subgroup_mask_eq); | ||
| 105 | } | ||
| 106 | |||
| 107 | Id EmitSubgroupLtMask(EmitContext& ctx) { | ||
| 108 | return LoadMask(ctx, ctx.subgroup_mask_lt); | ||
| 109 | } | ||
| 110 | |||
| 111 | Id EmitSubgroupLeMask(EmitContext& ctx) { | ||
| 112 | return LoadMask(ctx, ctx.subgroup_mask_le); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitSubgroupGtMask(EmitContext& ctx) { | ||
| 116 | return LoadMask(ctx, ctx.subgroup_mask_gt); | ||
| 117 | } | ||
| 118 | |||
| 119 | Id EmitSubgroupGeMask(EmitContext& ctx) { | ||
| 120 | return LoadMask(ctx, ctx.subgroup_mask_ge); | ||
| 121 | } | ||
| 122 | |||
| 123 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 124 | Id segmentation_mask) { | ||
| 125 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 127 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 128 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||
| 129 | |||
| 130 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||
| 131 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||
| 132 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 133 | |||
| 134 | SetInBoundsFlag(inst, in_range); | ||
| 135 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 136 | } | ||
| 137 | |||
| 138 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 139 | Id segmentation_mask) { | ||
| 140 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 141 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 142 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||
| 143 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 144 | |||
| 145 | SetInBoundsFlag(inst, in_range); | ||
| 146 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 150 | Id segmentation_mask) { | ||
| 151 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 152 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 153 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||
| 154 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 155 | |||
| 156 | SetInBoundsFlag(inst, in_range); | ||
| 157 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 158 | } | ||
| 159 | |||
| 160 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 161 | Id segmentation_mask) { | ||
| 162 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 163 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 164 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||
| 165 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 166 | |||
| 167 | SetInBoundsFlag(inst, in_range); | ||
| 168 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 169 | } | ||
| 170 | |||
| 171 | Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { | ||
| 172 | const Id three{ctx.Const(3U)}; | ||
| 173 | Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 174 | mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||
| 175 | mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Const(1U)); | ||
| 176 | mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask); | ||
| 177 | mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three); | ||
| 178 | |||
| 179 | const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)}; | ||
| 180 | const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)}; | ||
| 181 | |||
| 182 | const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)}; | ||
| 183 | const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)}; | ||
| 184 | return ctx.OpFAdd(ctx.F32[1], result_a, result_b); | ||
| 185 | } | ||
| 186 | |||
| 187 | Id EmitDPdxFine(EmitContext& ctx, Id op_a) { | ||
| 188 | return ctx.OpDPdxFine(ctx.F32[1], op_a); | ||
| 189 | } | ||
| 190 | |||
| 191 | Id EmitDPdyFine(EmitContext& ctx, Id op_a) { | ||
| 192 | return ctx.OpDPdyFine(ctx.F32[1], op_a); | ||
| 193 | } | ||
| 194 | |||
| 195 | Id EmitDPdxCoarse(EmitContext& ctx, Id op_a) { | ||
| 196 | return ctx.OpDPdxCoarse(ctx.F32[1], op_a); | ||
| 197 | } | ||
| 198 | |||
| 199 | Id EmitDPdyCoarse(EmitContext& ctx, Id op_a) { | ||
| 200 | return ctx.OpDPdyCoarse(ctx.F32[1], op_a); | ||
| 201 | } | ||
| 202 | |||
| 203 | } // namespace Shader::Backend::SPIRV | ||