diff options
Diffstat (limited to '')
16 files changed, 284 insertions, 69 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 028e8b2d2..4161783c8 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -16,7 +16,7 @@ add_library(shader_recompiler STATIC | |||
| 16 | backend/spirv/emit_spirv_select.cpp | 16 | backend/spirv/emit_spirv_select.cpp |
| 17 | backend/spirv/emit_spirv_special.cpp | 17 | backend/spirv/emit_spirv_special.cpp |
| 18 | backend/spirv/emit_spirv_undefined.cpp | 18 | backend/spirv/emit_spirv_undefined.cpp |
| 19 | backend/spirv/emit_spirv_vote.cpp | 19 | backend/spirv/emit_spirv_warp.cpp |
| 20 | environment.h | 20 | environment.h |
| 21 | exception.h | 21 | exception.h |
| 22 | file_environment.cpp | 22 | file_environment.cpp |
| @@ -125,6 +125,7 @@ add_library(shader_recompiler STATIC | |||
| 125 | frontend/maxwell/translate/impl/texture_fetch.cpp | 125 | frontend/maxwell/translate/impl/texture_fetch.cpp |
| 126 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | 126 | frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp |
| 127 | frontend/maxwell/translate/impl/vote.cpp | 127 | frontend/maxwell/translate/impl/vote.cpp |
| 128 | frontend/maxwell/translate/impl/warp_shuffle.cpp | ||
| 128 | frontend/maxwell/translate/translate.cpp | 129 | frontend/maxwell/translate/translate.cpp |
| 129 | frontend/maxwell/translate/translate.h | 130 | frontend/maxwell/translate/translate.h |
| 130 | ir_opt/collect_shader_info_pass.cpp | 131 | ir_opt/collect_shader_info_pass.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index ea46af244..5db4a9082 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { | |||
| 274 | if (info.uses_local_invocation_id) { | 274 | if (info.uses_local_invocation_id) { |
| 275 | local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); | 275 | local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); |
| 276 | } | 276 | } |
| 277 | if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { | 277 | if (info.uses_subgroup_invocation_id || |
| 278 | (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { | ||
| 278 | subgroup_local_invocation_id = | 279 | subgroup_local_invocation_id = |
| 279 | DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); | 280 | DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); |
| 280 | } | 281 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 107403912..cee72f50d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||
| 224 | ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | 224 | ctx.AddExtension("SPV_KHR_shader_draw_parameters"); |
| 225 | ctx.AddCapability(spv::Capability::DrawParameters); | 225 | ctx.AddCapability(spv::Capability::DrawParameters); |
| 226 | } | 226 | } |
| 227 | if (info.uses_subgroup_vote && profile.support_vote) { | 227 | if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { |
| 228 | ctx.AddExtension("SPV_KHR_shader_ballot"); | 228 | ctx.AddExtension("SPV_KHR_shader_ballot"); |
| 229 | ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | 229 | ctx.AddCapability(spv::Capability::SubgroupBallotKHR); |
| 230 | if (!profile.warp_size_potentially_larger_than_guest) { | 230 | if (!profile.warp_size_potentially_larger_than_guest) { |
| @@ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { | |||
| 315 | throw LogicError("Unreachable instruction"); | 315 | throw LogicError("Unreachable instruction"); |
| 316 | } | 316 | } |
| 317 | 317 | ||
| 318 | void EmitGetInBoundsFromOp(EmitContext&) { | ||
| 319 | throw LogicError("Unreachable instruction"); | ||
| 320 | } | ||
| 321 | |||
| 318 | } // namespace Shader::Backend::SPIRV | 322 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 6d4adafc7..a233a4817 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); | |||
| 158 | void EmitGetCarryFromOp(EmitContext& ctx); | 158 | void EmitGetCarryFromOp(EmitContext& ctx); |
| 159 | void EmitGetOverflowFromOp(EmitContext& ctx); | 159 | void EmitGetOverflowFromOp(EmitContext& ctx); |
| 160 | void EmitGetSparseFromOp(EmitContext& ctx); | 160 | void EmitGetSparseFromOp(EmitContext& ctx); |
| 161 | void EmitGetInBoundsFromOp(EmitContext& ctx); | ||
| 161 | Id EmitFPAbs16(EmitContext& ctx, Id value); | 162 | Id EmitFPAbs16(EmitContext& ctx, Id value); |
| 162 | Id EmitFPAbs32(EmitContext& ctx, Id value); | 163 | Id EmitFPAbs32(EmitContext& ctx, Id value); |
| 163 | Id EmitFPAbs64(EmitContext& ctx, Id value); | 164 | Id EmitFPAbs64(EmitContext& ctx, Id value); |
| @@ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); | |||
| 355 | Id EmitVoteAny(EmitContext& ctx, Id pred); | 356 | Id EmitVoteAny(EmitContext& ctx, Id pred); |
| 356 | Id EmitVoteEqual(EmitContext& ctx, Id pred); | 357 | Id EmitVoteEqual(EmitContext& ctx, Id pred); |
| 357 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | 358 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred); |
| 359 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 360 | Id segmentation_mask); | ||
| 361 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 362 | Id segmentation_mask); | ||
| 363 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 364 | Id segmentation_mask); | ||
| 365 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 366 | Id segmentation_mask); | ||
| 358 | 367 | ||
| 359 | } // namespace Shader::Backend::SPIRV | 368 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp deleted file mode 100644 index a63677ef2..000000000 --- a/src/shader_recompiler/backend/spirv/emit_spirv_vote.cpp +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||
| 10 | const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||
| 13 | } | ||
| 14 | } // Anonymous namespace | ||
| 15 | |||
| 16 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 17 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 18 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 19 | } | ||
| 20 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 21 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 22 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 23 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 24 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 25 | } | ||
| 26 | |||
| 27 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 28 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 29 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 30 | } | ||
| 31 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 32 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 33 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 34 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 35 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 36 | } | ||
| 37 | |||
| 38 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 39 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 40 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 41 | } | ||
| 42 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 43 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 44 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 45 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 46 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 47 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 48 | } | ||
| 49 | |||
| 50 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 51 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 52 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 53 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 54 | } | ||
| 55 | return LargeWarpBallot(ctx, ballot); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp new file mode 100644 index 000000000..44d8a347f --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||
| 10 | const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||
| 13 | } | ||
| 14 | |||
| 15 | void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||
| 16 | IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||
| 17 | if (!in_bounds) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | in_bounds->SetDefinition(result); | ||
| 21 | in_bounds->Invalidate(); | ||
| 22 | } | ||
| 23 | |||
| 24 | Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||
| 25 | return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||
| 26 | } | ||
| 27 | |||
| 28 | Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||
| 29 | return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||
| 30 | ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||
| 31 | } | ||
| 32 | |||
| 33 | Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||
| 34 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 35 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 36 | return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||
| 37 | } | ||
| 38 | |||
| 39 | Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||
| 40 | return ctx.OpSelect(ctx.U32[1], in_range, | ||
| 41 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||
| 42 | } | ||
| 43 | } // Anonymous namespace | ||
| 44 | |||
| 45 | Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||
| 46 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 47 | return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||
| 48 | } | ||
| 49 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 50 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 51 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 52 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 53 | return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||
| 54 | } | ||
| 55 | |||
| 56 | Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||
| 57 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 58 | return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||
| 59 | } | ||
| 60 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 61 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 62 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 63 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||
| 64 | return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||
| 65 | } | ||
| 66 | |||
| 67 | Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||
| 68 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 69 | return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||
| 70 | } | ||
| 71 | const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||
| 72 | const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||
| 73 | const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||
| 74 | const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||
| 75 | return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||
| 76 | ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||
| 77 | } | ||
| 78 | |||
| 79 | Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||
| 80 | const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||
| 81 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 82 | return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||
| 83 | } | ||
| 84 | return LargeWarpBallot(ctx, ballot); | ||
| 85 | } | ||
| 86 | |||
| 87 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 88 | Id segmentation_mask) { | ||
| 89 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||
| 90 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 91 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||
| 92 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||
| 93 | |||
| 94 | const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||
| 95 | const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||
| 96 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 97 | |||
| 98 | SetInBoundsFlag(inst, in_range); | ||
| 99 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 103 | Id segmentation_mask) { | ||
| 104 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 105 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 106 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||
| 107 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 108 | |||
| 109 | SetInBoundsFlag(inst, in_range); | ||
| 110 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 111 | } | ||
| 112 | |||
| 113 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 114 | Id segmentation_mask) { | ||
| 115 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 116 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 117 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||
| 118 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 119 | |||
| 120 | SetInBoundsFlag(inst, in_range); | ||
| 121 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 122 | } | ||
| 123 | |||
| 124 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||
| 125 | Id segmentation_mask) { | ||
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||
| 127 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||
| 128 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||
| 129 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||
| 130 | |||
| 131 | SetInBoundsFlag(inst, in_range); | ||
| 132 | return SelectValue(ctx, in_range, value, src_thread_id); | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 6280c08f6..418b7f5ac 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { | |||
| 374 | return Inst<U1>(Opcode::GetSparseFromOp, op); | 374 | return Inst<U1>(Opcode::GetSparseFromOp, op); |
| 375 | } | 375 | } |
| 376 | 376 | ||
| 377 | U1 IREmitter::GetInBoundsFromOp(const Value& op) { | ||
| 378 | return Inst<U1>(Opcode::GetInBoundsFromOp, op); | ||
| 379 | } | ||
| 380 | |||
| 377 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | 381 | F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { |
| 378 | if (a.Type() != b.Type()) { | 382 | if (a.Type() != b.Type()) { |
| 379 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | 383 | throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); |
| @@ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) { | |||
| 1486 | return Inst<U32>(Opcode::SubgroupBallot, value); | 1490 | return Inst<U32>(Opcode::SubgroupBallot, value); |
| 1487 | } | 1491 | } |
| 1488 | 1492 | ||
| 1493 | U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1494 | const IR::U32& seg_mask) { | ||
| 1495 | return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1499 | const IR::U32& seg_mask) { | ||
| 1500 | return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); | ||
| 1501 | } | ||
| 1502 | |||
| 1503 | U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1504 | const IR::U32& seg_mask) { | ||
| 1505 | return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); | ||
| 1506 | } | ||
| 1507 | |||
| 1508 | U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 1509 | const IR::U32& seg_mask) { | ||
| 1510 | return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||
| 1511 | } | ||
| 1489 | } // namespace Shader::IR | 1512 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ebbda78a9..64738735e 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -104,6 +104,7 @@ public: | |||
| 104 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); | 104 | [[nodiscard]] U1 GetCarryFromOp(const Value& op); |
| 105 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | 105 | [[nodiscard]] U1 GetOverflowFromOp(const Value& op); |
| 106 | [[nodiscard]] U1 GetSparseFromOp(const Value& op); | 106 | [[nodiscard]] U1 GetSparseFromOp(const Value& op); |
| 107 | [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); | ||
| 107 | 108 | ||
| 108 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | 109 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); |
| 109 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | 110 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); |
| @@ -147,7 +148,8 @@ public: | |||
| 147 | [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | 148 | [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); |
| 148 | [[nodiscard]] F32 FPSqrt(const F32& value); | 149 | [[nodiscard]] F32 FPSqrt(const F32& value); |
| 149 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | 150 | [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); |
| 150 | [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); | 151 | [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, |
| 152 | const F16F32F64& max_value); | ||
| 151 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | 153 | [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); |
| 152 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | 154 | [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); |
| 153 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | 155 | [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); |
| @@ -242,6 +244,14 @@ public: | |||
| 242 | [[nodiscard]] U1 VoteAny(const U1& value); | 244 | [[nodiscard]] U1 VoteAny(const U1& value); |
| 243 | [[nodiscard]] U1 VoteEqual(const U1& value); | 245 | [[nodiscard]] U1 VoteEqual(const U1& value); |
| 244 | [[nodiscard]] U32 SubgroupBallot(const U1& value); | 246 | [[nodiscard]] U32 SubgroupBallot(const U1& value); |
| 247 | [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 248 | const IR::U32& seg_mask); | ||
| 249 | [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 250 | const IR::U32& seg_mask); | ||
| 251 | [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||
| 252 | const IR::U32& seg_mask); | ||
| 253 | [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||
| 254 | const IR::U32& clamp, const IR::U32& seg_mask); | ||
| 245 | 255 | ||
| 246 | private: | 256 | private: |
| 247 | IR::Block::iterator insertion_point; | 257 | IR::Block::iterator insertion_point; |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index ba3968056..be8eb4d4c 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | |||
| 89 | case Opcode::GetCarryFromOp: | 89 | case Opcode::GetCarryFromOp: |
| 90 | case Opcode::GetOverflowFromOp: | 90 | case Opcode::GetOverflowFromOp: |
| 91 | case Opcode::GetSparseFromOp: | 91 | case Opcode::GetSparseFromOp: |
| 92 | case Opcode::GetInBoundsFromOp: | ||
| 92 | return true; | 93 | return true; |
| 93 | default: | 94 | default: |
| 94 | return false; | 95 | return false; |
| @@ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | |||
| 123 | case Opcode::GetSparseFromOp: | 124 | case Opcode::GetSparseFromOp: |
| 124 | CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | 125 | CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); |
| 125 | return associated_insts->sparse_inst; | 126 | return associated_insts->sparse_inst; |
| 127 | case Opcode::GetInBoundsFromOp: | ||
| 128 | CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||
| 129 | return associated_insts->in_bounds_inst; | ||
| 126 | default: | 130 | default: |
| 127 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); | 131 | throw InvalidArgument("{} is not a pseudo-instruction", opcode); |
| 128 | } | 132 | } |
| @@ -262,6 +266,10 @@ void Inst::Use(const Value& value) { | |||
| 262 | AllocAssociatedInsts(assoc_inst); | 266 | AllocAssociatedInsts(assoc_inst); |
| 263 | SetPseudoInstruction(assoc_inst->sparse_inst, this); | 267 | SetPseudoInstruction(assoc_inst->sparse_inst, this); |
| 264 | break; | 268 | break; |
| 269 | case Opcode::GetInBoundsFromOp: | ||
| 270 | AllocAssociatedInsts(assoc_inst); | ||
| 271 | SetPseudoInstruction(assoc_inst->in_bounds_inst, this); | ||
| 272 | break; | ||
| 265 | default: | 273 | default: |
| 266 | break; | 274 | break; |
| 267 | } | 275 | } |
| @@ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) { | |||
| 289 | AllocAssociatedInsts(assoc_inst); | 297 | AllocAssociatedInsts(assoc_inst); |
| 290 | RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | 298 | RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); |
| 291 | break; | 299 | break; |
| 300 | case Opcode::GetInBoundsFromOp: | ||
| 301 | AllocAssociatedInsts(assoc_inst); | ||
| 302 | RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||
| 303 | break; | ||
| 292 | default: | 304 | default: |
| 293 | break; | 305 | break; |
| 294 | } | 306 | } |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index d5336c438..770bbd550 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h | |||
| @@ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | |||
| 134 | 134 | ||
| 135 | struct AssociatedInsts { | 135 | struct AssociatedInsts { |
| 136 | union { | 136 | union { |
| 137 | Inst* in_bounds_inst; | ||
| 137 | Inst* sparse_inst; | 138 | Inst* sparse_inst; |
| 138 | Inst* zero_inst{}; | 139 | Inst* zero_inst{}; |
| 139 | }; | 140 | }; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index dd17212a1..a2479c46a 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -159,6 +159,7 @@ OPCODE(GetSignFromOp, U1, Opaq | |||
| 159 | OPCODE(GetCarryFromOp, U1, Opaque, ) | 159 | OPCODE(GetCarryFromOp, U1, Opaque, ) |
| 160 | OPCODE(GetOverflowFromOp, U1, Opaque, ) | 160 | OPCODE(GetOverflowFromOp, U1, Opaque, ) |
| 161 | OPCODE(GetSparseFromOp, U1, Opaque, ) | 161 | OPCODE(GetSparseFromOp, U1, Opaque, ) |
| 162 | OPCODE(GetInBoundsFromOp, U1, Opaque, ) | ||
| 162 | 163 | ||
| 163 | // Floating-point operations | 164 | // Floating-point operations |
| 164 | OPCODE(FPAbs16, F16, F16, ) | 165 | OPCODE(FPAbs16, F16, F16, ) |
| @@ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod, F32x4, U32, | |||
| 363 | OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 364 | OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 364 | OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) | 365 | OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, ) |
| 365 | 366 | ||
| 366 | // Vote operations | 367 | // Warp operations |
| 367 | OPCODE(VoteAll, U1, U1, ) | 368 | OPCODE(VoteAll, U1, U1, ) |
| 368 | OPCODE(VoteAny, U1, U1, ) | 369 | OPCODE(VoteAny, U1, U1, ) |
| 369 | OPCODE(VoteEqual, U1, U1, ) | 370 | OPCODE(VoteEqual, U1, U1, ) |
| 370 | OPCODE(SubgroupBallot, U32, U1, ) | 371 | OPCODE(SubgroupBallot, U32, U1, ) |
| 372 | OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, ) | ||
| 373 | OPCODE(ShuffleUp, U32, U32, U32, U32, U32, ) | ||
| 374 | OPCODE(ShuffleDown, U32, U32, U32, U32, U32, ) | ||
| 375 | OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, ) | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp index 5469e445a..42fd42bb1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { | |||
| 53 | ISCADD(*this, insn, GetReg20(insn)); | 53 | ISCADD(*this, insn, GetReg20(insn)); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | void TranslatorVisitor::ISCADD_cbuf(u64) { | 56 | void TranslatorVisitor::ISCADD_cbuf(u64 insn) { |
| 57 | throw NotImplementedException("ISCADD (cbuf)"); | 57 | ISCADD(*this, insn, GetCbuf(insn)); |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | void TranslatorVisitor::ISCADD_imm(u64 insn) { | 60 | void TranslatorVisitor::ISCADD_imm(u64 insn) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index c0e36a7e2..3ccd7b925 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { | |||
| 301 | ThrowNotImplemented(Opcode::SETLMEMBASE); | 301 | ThrowNotImplemented(Opcode::SETLMEMBASE); |
| 302 | } | 302 | } |
| 303 | 303 | ||
| 304 | void TranslatorVisitor::SHFL(u64) { | ||
| 305 | ThrowNotImplemented(Opcode::SHFL); | ||
| 306 | } | ||
| 307 | |||
| 308 | void TranslatorVisitor::SSY() { | 304 | void TranslatorVisitor::SSY() { |
| 309 | // SSY is a no-op | 305 | // SSY is a no-op |
| 310 | } | 306 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class ShuffleMode : u64 { | ||
| 14 | IDX, | ||
| 15 | UP, | ||
| 16 | DOWN, | ||
| 17 | BFLY, | ||
| 18 | }; | ||
| 19 | |||
| 20 | [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||
| 21 | const IR::U32& index, const IR::U32& mask, | ||
| 22 | ShuffleMode shfl_op) { | ||
| 23 | const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||
| 24 | const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||
| 25 | switch (shfl_op) { | ||
| 26 | case ShuffleMode::IDX: | ||
| 27 | return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||
| 28 | case ShuffleMode::UP: | ||
| 29 | return ir.ShuffleUp(value, index, clamp, seg_mask); | ||
| 30 | case ShuffleMode::DOWN: | ||
| 31 | return ir.ShuffleDown(value, index, clamp, seg_mask); | ||
| 32 | case ShuffleMode::BFLY: | ||
| 33 | return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||
| 34 | default: | ||
| 35 | throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||
| 40 | union { | ||
| 41 | u64 insn; | ||
| 42 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 43 | BitField<8, 8, IR::Reg> src_reg; | ||
| 44 | BitField<30, 2, ShuffleMode> mode; | ||
| 45 | BitField<48, 3, IR::Pred> pred; | ||
| 46 | } const shfl{insn}; | ||
| 47 | |||
| 48 | const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||
| 49 | v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||
| 50 | v.X(shfl.dest_reg, result); | ||
| 51 | } | ||
| 52 | } // Anonymous namespace | ||
| 53 | |||
| 54 | void TranslatorVisitor::SHFL(u64 insn) { | ||
| 55 | union { | ||
| 56 | u64 insn; | ||
| 57 | BitField<20, 5, u64> src_a_imm; | ||
| 58 | BitField<28, 1, u64> src_a_flag; | ||
| 59 | BitField<29, 1, u64> src_b_flag; | ||
| 60 | BitField<34, 13, u64> src_b_imm; | ||
| 61 | } const flags{insn}; | ||
| 62 | const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||
| 63 | : GetReg20(insn)}; | ||
| 64 | const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||
| 65 | : GetReg39(insn)}; | ||
| 66 | Shuffle(*this, insn, src_a, src_b); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 32f276f3b..61cc314c7 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -307,6 +307,12 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 307 | case IR::Opcode::LocalInvocationId: | 307 | case IR::Opcode::LocalInvocationId: |
| 308 | info.uses_local_invocation_id = true; | 308 | info.uses_local_invocation_id = true; |
| 309 | break; | 309 | break; |
| 310 | case IR::Opcode::ShuffleIndex: | ||
| 311 | case IR::Opcode::ShuffleUp: | ||
| 312 | case IR::Opcode::ShuffleDown: | ||
| 313 | case IR::Opcode::ShuffleButterfly: | ||
| 314 | info.uses_subgroup_invocation_id = true; | ||
| 315 | break; | ||
| 310 | case IR::Opcode::GetCbufU8: | 316 | case IR::Opcode::GetCbufU8: |
| 311 | case IR::Opcode::GetCbufS8: | 317 | case IR::Opcode::GetCbufS8: |
| 312 | case IR::Opcode::GetCbufU16: | 318 | case IR::Opcode::GetCbufU16: |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3d9f04d1a..27e61a5f9 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -56,6 +56,7 @@ struct Info { | |||
| 56 | 56 | ||
| 57 | bool uses_workgroup_id{}; | 57 | bool uses_workgroup_id{}; |
| 58 | bool uses_local_invocation_id{}; | 58 | bool uses_local_invocation_id{}; |
| 59 | bool uses_subgroup_invocation_id{}; | ||
| 59 | 60 | ||
| 60 | std::array<bool, 32> loads_generics{}; | 61 | std::array<bool, 32> loads_generics{}; |
| 61 | bool loads_position{}; | 62 | bool loads_position{}; |