diff options
Diffstat (limited to 'src/shader_recompiler')
7 files changed, 229 insertions, 59 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 580063fa9..170db269a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -58,8 +58,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, | |||
| 58 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | 58 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; |
| 59 | const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; | 59 | const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; |
| 60 | const auto extraction{num_bits == 32 ? cbuf_cast | 60 | const auto extraction{num_bits == 32 ? cbuf_cast |
| 61 | : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, | 61 | : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast, |
| 62 | bit_offset, num_bits)}; | 62 | bit_offset, num_bits)}; |
| 63 | if (!component_indexing_bug) { | 63 | if (!component_indexing_bug) { |
| 64 | const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; | 64 | const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; |
| 65 | ctx.Add("{}={};", ret, result); | 65 | ctx.Add("{}={};", ret, result); |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index a982dd8a2..cd285e2c8 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | namespace Shader::Backend::GLSL { | 12 | namespace Shader::Backend::GLSL { |
| 13 | namespace { | 13 | namespace { |
| 14 | constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"}; | ||
| 15 | |||
| 14 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { | 16 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { |
| 15 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | 17 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; |
| 16 | if (!in_bounds) { | 18 | if (!in_bounds) { |
| @@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, | |||
| 43 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); | 45 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); |
| 44 | SetInBoundsFlag(ctx, inst); | 46 | SetInBoundsFlag(ctx, inst); |
| 45 | } | 47 | } |
| 48 | |||
| 49 | std::string_view BallotIndex(EmitContext& ctx) { | ||
| 50 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 51 | return ".x"; | ||
| 52 | } | ||
| 53 | return "[gl_SubGroupInvocationARB>>5]"; | ||
| 54 | } | ||
| 55 | |||
| 56 | std::string GetMask(EmitContext& ctx, std::string_view mask) { | ||
| 57 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 58 | return fmt::format("uint(uvec2({}){})", mask, ballot_index); | ||
| 59 | } | ||
| 46 | } // Anonymous namespace | 60 | } // Anonymous namespace |
| 47 | 61 | ||
| 48 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | 62 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { |
| 49 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); | 63 | ctx.AddU32("{}={}&31u;", inst, THREAD_ID); |
| 50 | } | 64 | } |
| 51 | 65 | ||
| 52 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 66 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 53 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 67 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 54 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | 68 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |
| 55 | } else { | 69 | return; |
| 56 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 57 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 58 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 59 | } | 70 | } |
| 71 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 72 | const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; | ||
| 73 | const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; | ||
| 74 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 60 | } | 75 | } |
| 61 | 76 | ||
| 62 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 77 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 63 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 78 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 64 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | 79 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); |
| 65 | } else { | 80 | return; |
| 66 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 67 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 68 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 69 | } | 81 | } |
| 82 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 83 | const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; | ||
| 84 | const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; | ||
| 85 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 70 | } | 86 | } |
| 71 | 87 | ||
| 72 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 88 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 89 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 74 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | 90 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |
| 75 | } else { | 91 | return; |
| 76 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 77 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 78 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 79 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 80 | } | 92 | } |
| 93 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 94 | const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; | ||
| 95 | const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; | ||
| 96 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 97 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 81 | } | 98 | } |
| 82 | 99 | ||
| 83 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 100 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 101 | const auto ballot_index{BallotIndex(ctx)}; |
| 85 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | 102 | ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index); |
| 86 | } else { | ||
| 87 | ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); | ||
| 88 | } | ||
| 89 | } | 103 | } |
| 90 | 104 | ||
| 91 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | 105 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { |
| 92 | ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); | 106 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB")); |
| 93 | } | 107 | } |
| 94 | 108 | ||
| 95 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | 109 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { |
| 96 | ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); | 110 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB")); |
| 97 | } | 111 | } |
| 98 | 112 | ||
| 99 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | 113 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { |
| 100 | ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); | 114 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB")); |
| 101 | } | 115 | } |
| 102 | 116 | ||
| 103 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | 117 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { |
| 104 | ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); | 118 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB")); |
| 105 | } | 119 | } |
| 106 | 120 | ||
| 107 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | 121 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { |
| 108 | ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); | 122 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB")); |
| 109 | } | 123 | } |
| 110 | 124 | ||
| 111 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 125 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 112 | std::string_view index, std::string_view clamp, | 126 | std::string_view index, std::string_view clamp, std::string_view seg_mask) { |
| 113 | std::string_view segmentation_mask) { | ||
| 114 | if (ctx.profile.support_gl_warp_intrinsics) { | 127 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 115 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); | 128 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask); |
| 116 | return; | 129 | return; |
| 117 | } | 130 | } |
| 118 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | 131 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 119 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 132 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 120 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | 133 | const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)}; |
| 121 | const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; | 134 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 135 | |||
| 136 | const auto not_seg_mask{fmt::format("(~{})", seg_mask)}; | ||
| 137 | const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)}; | ||
| 138 | const auto max_thread_id{ | ||
| 139 | ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)}; | ||
| 122 | 140 | ||
| 123 | const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; | 141 | const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)}; |
| 124 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | 142 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; |
| 125 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 143 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 126 | SetInBoundsFlag(ctx, inst); | 144 | SetInBoundsFlag(ctx, inst); |
| @@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 128 | } | 146 | } |
| 129 | 147 | ||
| 130 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | 148 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, |
| 131 | std::string_view clamp, std::string_view segmentation_mask) { | 149 | std::string_view clamp, std::string_view seg_mask) { |
| 132 | if (ctx.profile.support_gl_warp_intrinsics) { | 150 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 133 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); | 151 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask); |
| 134 | return; | 152 | return; |
| 135 | } | 153 | } |
| 136 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 154 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 137 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 155 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 138 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | 156 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 157 | |||
| 158 | const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; | ||
| 159 | const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)}; | ||
| 139 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | 160 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); |
| 140 | SetInBoundsFlag(ctx, inst); | 161 | SetInBoundsFlag(ctx, inst); |
| 141 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 162 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 142 | } | 163 | } |
| 143 | 164 | ||
| 144 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 165 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 145 | std::string_view index, std::string_view clamp, | 166 | std::string_view index, std::string_view clamp, std::string_view seg_mask) { |
| 146 | std::string_view segmentation_mask) { | ||
| 147 | if (ctx.profile.support_gl_warp_intrinsics) { | 167 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 148 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); | 168 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask); |
| 149 | return; | 169 | return; |
| 150 | } | 170 | } |
| 151 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 171 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 152 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 172 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 153 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | 173 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 174 | |||
| 175 | const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; | ||
| 176 | const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)}; | ||
| 154 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 177 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 155 | SetInBoundsFlag(ctx, inst); | 178 | SetInBoundsFlag(ctx, inst); |
| 156 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 179 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| @@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 158 | 181 | ||
| 159 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 182 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 160 | std::string_view index, std::string_view clamp, | 183 | std::string_view index, std::string_view clamp, |
| 161 | std::string_view segmentation_mask) { | 184 | std::string_view seg_mask) { |
| 162 | if (ctx.profile.support_gl_warp_intrinsics) { | 185 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 163 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); | 186 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask); |
| 164 | return; | 187 | return; |
| 165 | } | 188 | } |
| 166 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 189 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 167 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 190 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 168 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | 191 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 192 | |||
| 193 | const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; | ||
| 194 | const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)}; | ||
| 169 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 195 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 170 | SetInBoundsFlag(ctx, inst); | 196 | SetInBoundsFlag(ctx, inst); |
| 171 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 197 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | 15 | ||
| 16 | namespace Shader::Backend::SPIRV { | 16 | namespace Shader::Backend::SPIRV { |
| 17 | namespace { | 17 | namespace { |
| 18 | constexpr size_t NUM_FIXEDFNCTEXTURE = 10; | ||
| 19 | |||
| 18 | enum class Operation { | 20 | enum class Operation { |
| 19 | Increment, | 21 | Increment, |
| 20 | Decrement, | 22 | Decrement, |
| @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { | |||
| 427 | return pointer_type; | 429 | return pointer_type; |
| 428 | } | 430 | } |
| 429 | } | 431 | } |
| 432 | |||
| 433 | size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, | ||
| 434 | size_t start_offset) { | ||
| 435 | for (size_t location = start_offset; location < used_locations.size(); ++location) { | ||
| 436 | if (!used_locations.test(location)) { | ||
| 437 | return location; | ||
| 438 | } | ||
| 439 | } | ||
| 440 | throw RuntimeError("Unable to get an unused location for legacy attribute"); | ||
| 441 | } | ||
| 430 | } // Anonymous namespace | 442 | } // Anonymous namespace |
| 431 | 443 | ||
| 432 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | 444 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { |
| @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1227 | loads[IR::Attribute::TessellationEvaluationPointV]) { | 1239 | loads[IR::Attribute::TessellationEvaluationPointV]) { |
| 1228 | tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); | 1240 | tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); |
| 1229 | } | 1241 | } |
| 1242 | std::bitset<IR::NUM_GENERICS> used_locations{}; | ||
| 1230 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | 1243 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| 1231 | const AttributeType input_type{runtime_info.generic_input_types[index]}; | 1244 | const AttributeType input_type{runtime_info.generic_input_types[index]}; |
| 1232 | if (!runtime_info.previous_stage_stores.Generic(index)) { | 1245 | if (!runtime_info.previous_stage_stores.Generic(index)) { |
| @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1238 | if (input_type == AttributeType::Disabled) { | 1251 | if (input_type == AttributeType::Disabled) { |
| 1239 | continue; | 1252 | continue; |
| 1240 | } | 1253 | } |
| 1254 | used_locations.set(index); | ||
| 1241 | const Id type{GetAttributeType(*this, input_type)}; | 1255 | const Id type{GetAttributeType(*this, input_type)}; |
| 1242 | const Id id{DefineInput(*this, type, true)}; | 1256 | const Id id{DefineInput(*this, type, true)}; |
| 1243 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | 1257 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); |
| @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1263 | break; | 1277 | break; |
| 1264 | } | 1278 | } |
| 1265 | } | 1279 | } |
| 1280 | size_t previous_unused_location = 0; | ||
| 1281 | if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 1282 | const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1283 | previous_unused_location = location; | ||
| 1284 | used_locations.set(location); | ||
| 1285 | const Id id{DefineInput(*this, F32[4], true)}; | ||
| 1286 | Decorate(id, spv::Decoration::Location, location); | ||
| 1287 | input_front_color = id; | ||
| 1288 | } | ||
| 1289 | for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { | ||
| 1290 | if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { | ||
| 1291 | const size_t location = | ||
| 1292 | FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1293 | previous_unused_location = location; | ||
| 1294 | used_locations.set(location); | ||
| 1295 | const Id id{DefineInput(*this, F32[4], true)}; | ||
| 1296 | Decorate(id, spv::Decoration::Location, location); | ||
| 1297 | input_fixed_fnc_textures[index] = id; | ||
| 1298 | } | ||
| 1299 | } | ||
| 1266 | if (stage == Stage::TessellationEval) { | 1300 | if (stage == Stage::TessellationEval) { |
| 1267 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | 1301 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { |
| 1268 | if (!info.uses_patches[index]) { | 1302 | if (!info.uses_patches[index]) { |
| @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) { | |||
| 1313 | viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, | 1347 | viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, |
| 1314 | spv::BuiltIn::ViewportMaskNV); | 1348 | spv::BuiltIn::ViewportMaskNV); |
| 1315 | } | 1349 | } |
| 1350 | std::bitset<IR::NUM_GENERICS> used_locations{}; | ||
| 1316 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | 1351 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| 1317 | if (info.stores.Generic(index)) { | 1352 | if (info.stores.Generic(index)) { |
| 1318 | DefineGenericOutput(*this, index, invocations); | 1353 | DefineGenericOutput(*this, index, invocations); |
| 1354 | used_locations.set(index); | ||
| 1355 | } | ||
| 1356 | } | ||
| 1357 | size_t previous_unused_location = 0; | ||
| 1358 | if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 1359 | const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1360 | previous_unused_location = location; | ||
| 1361 | used_locations.set(location); | ||
| 1362 | const Id id{DefineOutput(*this, F32[4], invocations)}; | ||
| 1363 | Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); | ||
| 1364 | output_front_color = id; | ||
| 1365 | } | ||
| 1366 | for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { | ||
| 1367 | if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { | ||
| 1368 | const size_t location = | ||
| 1369 | FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1370 | previous_unused_location = location; | ||
| 1371 | used_locations.set(location); | ||
| 1372 | const Id id{DefineOutput(*this, F32[4], invocations)}; | ||
| 1373 | Decorate(id, spv::Decoration::Location, location); | ||
| 1374 | output_fixed_fnc_textures[index] = id; | ||
| 1319 | } | 1375 | } |
| 1320 | } | 1376 | } |
| 1321 | switch (stage) { | 1377 | switch (stage) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -268,10 +268,14 @@ public: | |||
| 268 | Id write_global_func_u32x4{}; | 268 | Id write_global_func_u32x4{}; |
| 269 | 269 | ||
| 270 | Id input_position{}; | 270 | Id input_position{}; |
| 271 | Id input_front_color{}; | ||
| 272 | std::array<Id, 10> input_fixed_fnc_textures{}; | ||
| 271 | std::array<Id, 32> input_generics{}; | 273 | std::array<Id, 32> input_generics{}; |
| 272 | 274 | ||
| 273 | Id output_point_size{}; | 275 | Id output_point_size{}; |
| 274 | Id output_position{}; | 276 | Id output_position{}; |
| 277 | Id output_front_color{}; | ||
| 278 | std::array<Id, 10> output_fixed_fnc_textures{}; | ||
| 275 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; | 279 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; |
| 276 | 280 | ||
| 277 | Id output_tess_level_outer{}; | 281 | Id output_tess_level_outer{}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 9e54a17ee..6f60c6574 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... | |||
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | bool IsFixedFncTexture(IR::Attribute attribute) { | ||
| 47 | return attribute >= IR::Attribute::FixedFncTexture0S && | ||
| 48 | attribute <= IR::Attribute::FixedFncTexture9Q; | ||
| 49 | } | ||
| 50 | |||
| 51 | u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { | ||
| 52 | if (!IsFixedFncTexture(attribute)) { | ||
| 53 | throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); | ||
| 54 | } | ||
| 55 | return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; | ||
| 56 | } | ||
| 57 | |||
| 58 | u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { | ||
| 59 | if (!IsFixedFncTexture(attribute)) { | ||
| 60 | throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); | ||
| 61 | } | ||
| 62 | return static_cast<u32>(attribute) % 4u; | ||
| 63 | } | ||
| 64 | |||
| 46 | template <typename... Args> | 65 | template <typename... Args> |
| 47 | Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { | 66 | Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { |
| 48 | if (ctx.stage == Stage::TessellationControl) { | 67 | if (ctx.stage == Stage::TessellationControl) { |
| @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 74 | return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); | 93 | return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); |
| 75 | } | 94 | } |
| 76 | } | 95 | } |
| 96 | if (IsFixedFncTexture(attr)) { | ||
| 97 | const u32 index{FixedFncTextureAttributeIndex(attr)}; | ||
| 98 | const u32 element{FixedFncTextureAttributeElement(attr)}; | ||
| 99 | const Id element_id{ctx.Const(element)}; | ||
| 100 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], | ||
| 101 | element_id); | ||
| 102 | } | ||
| 77 | switch (attr) { | 103 | switch (attr) { |
| 78 | case IR::Attribute::PointSize: | 104 | case IR::Attribute::PointSize: |
| 79 | return ctx.output_point_size; | 105 | return ctx.output_point_size; |
| @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 85 | const Id element_id{ctx.Const(element)}; | 111 | const Id element_id{ctx.Const(element)}; |
| 86 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); | 112 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); |
| 87 | } | 113 | } |
| 114 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 115 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 116 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 117 | case IR::Attribute::ColorFrontDiffuseA: { | ||
| 118 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 119 | const Id element_id{ctx.Const(element)}; | ||
| 120 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); | ||
| 121 | } | ||
| 88 | case IR::Attribute::ClipDistance0: | 122 | case IR::Attribute::ClipDistance0: |
| 89 | case IR::Attribute::ClipDistance1: | 123 | case IR::Attribute::ClipDistance1: |
| 90 | case IR::Attribute::ClipDistance2: | 124 | case IR::Attribute::ClipDistance2: |
| @@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 307 | const Id value{ctx.OpLoad(type->id, pointer)}; | 341 | const Id value{ctx.OpLoad(type->id, pointer)}; |
| 308 | return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; | 342 | return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; |
| 309 | } | 343 | } |
| 344 | if (IsFixedFncTexture(attr)) { | ||
| 345 | const u32 index{FixedFncTextureAttributeIndex(attr)}; | ||
| 346 | const Id attr_id{ctx.input_fixed_fnc_textures[index]}; | ||
| 347 | const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; | ||
| 348 | return ctx.OpLoad(ctx.F32[1], attr_ptr); | ||
| 349 | } | ||
| 310 | switch (attr) { | 350 | switch (attr) { |
| 311 | case IR::Attribute::PrimitiveId: | 351 | case IR::Attribute::PrimitiveId: |
| 312 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); | 352 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); |
| @@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 316 | case IR::Attribute::PositionW: | 356 | case IR::Attribute::PositionW: |
| 317 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, | 357 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, |
| 318 | ctx.Const(element))); | 358 | ctx.Const(element))); |
| 359 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 360 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 361 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 362 | case IR::Attribute::ColorFrontDiffuseA: { | ||
| 363 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, | ||
| 364 | ctx.Const(element))); | ||
| 365 | } | ||
| 319 | case IR::Attribute::InstanceId: | 366 | case IR::Attribute::InstanceId: |
| 320 | if (ctx.profile.support_vertex_instance_id) { | 367 | if (ctx.profile.support_vertex_instance_id) { |
| 321 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); | 368 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); |
| @@ -430,7 +477,13 @@ void EmitSetSampleMask(EmitContext& ctx, Id value) { | |||
| 430 | } | 477 | } |
| 431 | 478 | ||
| 432 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | 479 | void EmitSetFragDepth(EmitContext& ctx, Id value) { |
| 433 | ctx.OpStore(ctx.frag_depth, value); | 480 | if (!ctx.runtime_info.convert_depth_mode) { |
| 481 | ctx.OpStore(ctx.frag_depth, value); | ||
| 482 | return; | ||
| 483 | } | ||
| 484 | const Id unit{ctx.Const(0.5f)}; | ||
| 485 | const Id new_depth{ctx.OpFma(ctx.F32[1], value, unit, unit)}; | ||
| 486 | ctx.OpStore(ctx.frag_depth, new_depth); | ||
| 434 | } | 487 | } |
| 435 | 488 | ||
| 436 | void EmitGetZFlag(EmitContext&) { | 489 | void EmitGetZFlag(EmitContext&) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 78b1e1ba7..cef52c56e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -7,8 +7,13 @@ | |||
| 7 | 7 | ||
| 8 | namespace Shader::Backend::SPIRV { | 8 | namespace Shader::Backend::SPIRV { |
| 9 | namespace { | 9 | namespace { |
| 10 | Id GetThreadId(EmitContext& ctx) { | ||
| 11 | return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); | ||
| 12 | } | ||
| 13 | |||
| 10 | Id WarpExtract(EmitContext& ctx, Id value) { | 14 | Id WarpExtract(EmitContext& ctx, Id value) { |
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 15 | const Id thread_id{GetThreadId(ctx)}; |
| 16 | const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | 17 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); |
| 13 | } | 18 | } |
| 14 | 19 | ||
| @@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | |||
| 48 | return ctx.OpSelect(ctx.U32[1], in_range, | 53 | return ctx.OpSelect(ctx.U32[1], in_range, |
| 49 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | 54 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); |
| 50 | } | 55 | } |
| 56 | |||
| 57 | Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { | ||
| 58 | const Id thirty_two{ctx.Const(32u)}; | ||
| 59 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; | ||
| 60 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 61 | return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 62 | } | ||
| 51 | } // Anonymous namespace | 63 | } // Anonymous namespace |
| 52 | 64 | ||
| 53 | Id EmitLaneId(EmitContext& ctx) { | 65 | Id EmitLaneId(EmitContext& ctx) { |
| 54 | const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 66 | const Id id{GetThreadId(ctx)}; |
| 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 67 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 56 | return id; | 68 | return id; |
| 57 | } | 69 | } |
| @@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { | |||
| 123 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 135 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 124 | Id segmentation_mask) { | 136 | Id segmentation_mask) { |
| 125 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | 137 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; |
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 138 | const Id thread_id{GetThreadId(ctx)}; |
| 139 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 140 | const Id thirty_two{ctx.Const(32u)}; | ||
| 141 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; | ||
| 142 | const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; | ||
| 143 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 144 | index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); | ||
| 145 | clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 146 | } | ||
| 127 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | 147 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; |
| 128 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | 148 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; |
| 129 | 149 | ||
| @@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla | |||
| 137 | 157 | ||
| 138 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 158 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 139 | Id segmentation_mask) { | 159 | Id segmentation_mask) { |
| 140 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 160 | const Id thread_id{GetThreadId(ctx)}; |
| 161 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 162 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 163 | } | ||
| 141 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 164 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 142 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | 165 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; |
| 143 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 166 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | |||
| 148 | 171 | ||
| 149 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 172 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 150 | Id segmentation_mask) { | 173 | Id segmentation_mask) { |
| 151 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 174 | const Id thread_id{GetThreadId(ctx)}; |
| 175 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 176 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 177 | } | ||
| 152 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 178 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 153 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | 179 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; |
| 154 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 180 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam | |||
| 159 | 185 | ||
| 160 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 186 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 161 | Id segmentation_mask) { | 187 | Id segmentation_mask) { |
| 162 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 188 | const Id thread_id{GetThreadId(ctx)}; |
| 189 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 190 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 191 | } | ||
| 163 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 192 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 164 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | 193 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; |
| 165 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 194 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index f3b12d04b..a12ddcc8f 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h | |||
| @@ -11,14 +11,16 @@ | |||
| 11 | namespace Shader { | 11 | namespace Shader { |
| 12 | 12 | ||
| 13 | template <typename T> | 13 | template <typename T> |
| 14 | requires std::is_destructible_v<T> class ObjectPool { | 14 | requires std::is_destructible_v<T> |
| 15 | class ObjectPool { | ||
| 15 | public: | 16 | public: |
| 16 | explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { | 17 | explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { |
| 17 | node = &chunks.emplace_back(new_chunk_size); | 18 | node = &chunks.emplace_back(new_chunk_size); |
| 18 | } | 19 | } |
| 19 | 20 | ||
| 20 | template <typename... Args> | 21 | template <typename... Args> |
| 21 | requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { | 22 | requires std::is_constructible_v<T, Args...> |
| 23 | [[nodiscard]] T* Create(Args&&... args) { | ||
| 22 | return std::construct_at(Memory(), std::forward<Args>(args)...); | 24 | return std::construct_at(Memory(), std::forward<Args>(args)...); |
| 23 | } | 25 | } |
| 24 | 26 | ||