diff options
Diffstat (limited to 'src/shader_recompiler')
12 files changed, 369 insertions, 78 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 580063fa9..170db269a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -58,8 +58,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, | |||
| 58 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | 58 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; |
| 59 | const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; | 59 | const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; |
| 60 | const auto extraction{num_bits == 32 ? cbuf_cast | 60 | const auto extraction{num_bits == 32 ? cbuf_cast |
| 61 | : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, | 61 | : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast, |
| 62 | bit_offset, num_bits)}; | 62 | bit_offset, num_bits)}; |
| 63 | if (!component_indexing_bug) { | 63 | if (!component_indexing_bug) { |
| 64 | const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; | 64 | const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; |
| 65 | ctx.Add("{}={};", ret, result); | 65 | ctx.Add("{}={};", ret, result); |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index a982dd8a2..cd285e2c8 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | namespace Shader::Backend::GLSL { | 12 | namespace Shader::Backend::GLSL { |
| 13 | namespace { | 13 | namespace { |
| 14 | constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"}; | ||
| 15 | |||
| 14 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { | 16 | void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { |
| 15 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | 17 | IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; |
| 16 | if (!in_bounds) { | 18 | if (!in_bounds) { |
| @@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op, | |||
| 43 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); | 45 | ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); |
| 44 | SetInBoundsFlag(ctx, inst); | 46 | SetInBoundsFlag(ctx, inst); |
| 45 | } | 47 | } |
| 48 | |||
| 49 | std::string_view BallotIndex(EmitContext& ctx) { | ||
| 50 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 51 | return ".x"; | ||
| 52 | } | ||
| 53 | return "[gl_SubGroupInvocationARB>>5]"; | ||
| 54 | } | ||
| 55 | |||
| 56 | std::string GetMask(EmitContext& ctx, std::string_view mask) { | ||
| 57 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 58 | return fmt::format("uint(uvec2({}){})", mask, ballot_index); | ||
| 59 | } | ||
| 46 | } // Anonymous namespace | 60 | } // Anonymous namespace |
| 47 | 61 | ||
| 48 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { | 62 | void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { |
| 49 | ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); | 63 | ctx.AddU32("{}={}&31u;", inst, THREAD_ID); |
| 50 | } | 64 | } |
| 51 | 65 | ||
| 52 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 66 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 53 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 67 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 54 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | 68 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |
| 55 | } else { | 69 | return; |
| 56 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 57 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 58 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 59 | } | 70 | } |
| 71 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 72 | const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; | ||
| 73 | const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; | ||
| 74 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 60 | } | 75 | } |
| 61 | 76 | ||
| 62 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 77 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 63 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 78 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 64 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | 79 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); |
| 65 | } else { | 80 | return; |
| 66 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 67 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 68 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 69 | } | 81 | } |
| 82 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 83 | const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; | ||
| 84 | const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; | ||
| 85 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 70 | } | 86 | } |
| 71 | 87 | ||
| 72 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 88 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 73 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 89 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 74 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | 90 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |
| 75 | } else { | 91 | return; |
| 76 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")}; | ||
| 77 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)}; | ||
| 78 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 79 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 80 | } | 92 | } |
| 93 | const auto ballot_index{BallotIndex(ctx)}; | ||
| 94 | const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)}; | ||
| 95 | const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)}; | ||
| 96 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 97 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 81 | } | 98 | } |
| 82 | 99 | ||
| 83 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 100 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 84 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 101 | const auto ballot_index{BallotIndex(ctx)}; |
| 85 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | 102 | ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index); |
| 86 | } else { | ||
| 87 | ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred); | ||
| 88 | } | ||
| 89 | } | 103 | } |
| 90 | 104 | ||
| 91 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | 105 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { |
| 92 | ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); | 106 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB")); |
| 93 | } | 107 | } |
| 94 | 108 | ||
| 95 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { | 109 | void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { |
| 96 | ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); | 110 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB")); |
| 97 | } | 111 | } |
| 98 | 112 | ||
| 99 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { | 113 | void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { |
| 100 | ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); | 114 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB")); |
| 101 | } | 115 | } |
| 102 | 116 | ||
| 103 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { | 117 | void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { |
| 104 | ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); | 118 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB")); |
| 105 | } | 119 | } |
| 106 | 120 | ||
| 107 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { | 121 | void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { |
| 108 | ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); | 122 | ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB")); |
| 109 | } | 123 | } |
| 110 | 124 | ||
| 111 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 125 | void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 112 | std::string_view index, std::string_view clamp, | 126 | std::string_view index, std::string_view clamp, std::string_view seg_mask) { |
| 113 | std::string_view segmentation_mask) { | ||
| 114 | if (ctx.profile.support_gl_warp_intrinsics) { | 127 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 115 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); | 128 | UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask); |
| 116 | return; | 129 | return; |
| 117 | } | 130 | } |
| 118 | const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; | 131 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 119 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 132 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 120 | const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; | 133 | const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)}; |
| 121 | const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; | 134 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 135 | |||
| 136 | const auto not_seg_mask{fmt::format("(~{})", seg_mask)}; | ||
| 137 | const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)}; | ||
| 138 | const auto max_thread_id{ | ||
| 139 | ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)}; | ||
| 122 | 140 | ||
| 123 | const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; | 141 | const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)}; |
| 124 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; | 142 | const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; |
| 125 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 143 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 126 | SetInBoundsFlag(ctx, inst); | 144 | SetInBoundsFlag(ctx, inst); |
| @@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 128 | } | 146 | } |
| 129 | 147 | ||
| 130 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, | 148 | void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, |
| 131 | std::string_view clamp, std::string_view segmentation_mask) { | 149 | std::string_view clamp, std::string_view seg_mask) { |
| 132 | if (ctx.profile.support_gl_warp_intrinsics) { | 150 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 133 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); | 151 | UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask); |
| 134 | return; | 152 | return; |
| 135 | } | 153 | } |
| 136 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 154 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 137 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 155 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 138 | const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; | 156 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 157 | |||
| 158 | const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; | ||
| 159 | const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)}; | ||
| 139 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); | 160 | ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); |
| 140 | SetInBoundsFlag(ctx, inst); | 161 | SetInBoundsFlag(ctx, inst); |
| 141 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 162 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| 142 | } | 163 | } |
| 143 | 164 | ||
| 144 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 165 | void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 145 | std::string_view index, std::string_view clamp, | 166 | std::string_view index, std::string_view clamp, std::string_view seg_mask) { |
| 146 | std::string_view segmentation_mask) { | ||
| 147 | if (ctx.profile.support_gl_warp_intrinsics) { | 167 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 148 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); | 168 | UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask); |
| 149 | return; | 169 | return; |
| 150 | } | 170 | } |
| 151 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 171 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 152 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 172 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 153 | const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; | 173 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 174 | |||
| 175 | const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; | ||
| 176 | const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)}; | ||
| 154 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 177 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 155 | SetInBoundsFlag(ctx, inst); | 178 | SetInBoundsFlag(ctx, inst); |
| 156 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 179 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
| @@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, | |||
| 158 | 181 | ||
| 159 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, | 182 | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, |
| 160 | std::string_view index, std::string_view clamp, | 183 | std::string_view index, std::string_view clamp, |
| 161 | std::string_view segmentation_mask) { | 184 | std::string_view seg_mask) { |
| 162 | if (ctx.profile.support_gl_warp_intrinsics) { | 185 | if (ctx.profile.support_gl_warp_intrinsics) { |
| 163 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); | 186 | UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask); |
| 164 | return; | 187 | return; |
| 165 | } | 188 | } |
| 166 | const auto thread_id{"gl_SubGroupInvocationARB"}; | 189 | const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest}; |
| 167 | const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; | 190 | const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"}; |
| 168 | const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; | 191 | const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)}; |
| 192 | |||
| 193 | const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)}; | ||
| 194 | const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)}; | ||
| 169 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); | 195 | ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); |
| 170 | SetInBoundsFlag(ctx, inst); | 196 | SetInBoundsFlag(ctx, inst); |
| 171 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); | 197 | ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2d29d8c14..2885e6799 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | 15 | ||
| 16 | namespace Shader::Backend::SPIRV { | 16 | namespace Shader::Backend::SPIRV { |
| 17 | namespace { | 17 | namespace { |
| 18 | constexpr size_t NUM_FIXEDFNCTEXTURE = 10; | ||
| 19 | |||
| 18 | enum class Operation { | 20 | enum class Operation { |
| 19 | Increment, | 21 | Increment, |
| 20 | Decrement, | 22 | Decrement, |
| @@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) { | |||
| 427 | return pointer_type; | 429 | return pointer_type; |
| 428 | } | 430 | } |
| 429 | } | 431 | } |
| 432 | |||
| 433 | size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations, | ||
| 434 | size_t start_offset) { | ||
| 435 | for (size_t location = start_offset; location < used_locations.size(); ++location) { | ||
| 436 | if (!used_locations.test(location)) { | ||
| 437 | return location; | ||
| 438 | } | ||
| 439 | } | ||
| 440 | throw RuntimeError("Unable to get an unused location for legacy attribute"); | ||
| 441 | } | ||
| 430 | } // Anonymous namespace | 442 | } // Anonymous namespace |
| 431 | 443 | ||
| 432 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { | 444 | void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { |
| @@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1227 | loads[IR::Attribute::TessellationEvaluationPointV]) { | 1239 | loads[IR::Attribute::TessellationEvaluationPointV]) { |
| 1228 | tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); | 1240 | tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); |
| 1229 | } | 1241 | } |
| 1242 | std::bitset<IR::NUM_GENERICS> used_locations{}; | ||
| 1230 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | 1243 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| 1231 | const AttributeType input_type{runtime_info.generic_input_types[index]}; | 1244 | const AttributeType input_type{runtime_info.generic_input_types[index]}; |
| 1232 | if (!runtime_info.previous_stage_stores.Generic(index)) { | 1245 | if (!runtime_info.previous_stage_stores.Generic(index)) { |
| @@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1238 | if (input_type == AttributeType::Disabled) { | 1251 | if (input_type == AttributeType::Disabled) { |
| 1239 | continue; | 1252 | continue; |
| 1240 | } | 1253 | } |
| 1254 | used_locations.set(index); | ||
| 1241 | const Id type{GetAttributeType(*this, input_type)}; | 1255 | const Id type{GetAttributeType(*this, input_type)}; |
| 1242 | const Id id{DefineInput(*this, type, true)}; | 1256 | const Id id{DefineInput(*this, type, true)}; |
| 1243 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); | 1257 | Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); |
| @@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) { | |||
| 1263 | break; | 1277 | break; |
| 1264 | } | 1278 | } |
| 1265 | } | 1279 | } |
| 1280 | size_t previous_unused_location = 0; | ||
| 1281 | if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 1282 | const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1283 | previous_unused_location = location; | ||
| 1284 | used_locations.set(location); | ||
| 1285 | const Id id{DefineInput(*this, F32[4], true)}; | ||
| 1286 | Decorate(id, spv::Decoration::Location, location); | ||
| 1287 | input_front_color = id; | ||
| 1288 | } | ||
| 1289 | for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { | ||
| 1290 | if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { | ||
| 1291 | const size_t location = | ||
| 1292 | FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1293 | previous_unused_location = location; | ||
| 1294 | used_locations.set(location); | ||
| 1295 | const Id id{DefineInput(*this, F32[4], true)}; | ||
| 1296 | Decorate(id, spv::Decoration::Location, location); | ||
| 1297 | input_fixed_fnc_textures[index] = id; | ||
| 1298 | } | ||
| 1299 | } | ||
| 1266 | if (stage == Stage::TessellationEval) { | 1300 | if (stage == Stage::TessellationEval) { |
| 1267 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | 1301 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { |
| 1268 | if (!info.uses_patches[index]) { | 1302 | if (!info.uses_patches[index]) { |
| @@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) { | |||
| 1313 | viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, | 1347 | viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, |
| 1314 | spv::BuiltIn::ViewportMaskNV); | 1348 | spv::BuiltIn::ViewportMaskNV); |
| 1315 | } | 1349 | } |
| 1350 | std::bitset<IR::NUM_GENERICS> used_locations{}; | ||
| 1316 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | 1351 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { |
| 1317 | if (info.stores.Generic(index)) { | 1352 | if (info.stores.Generic(index)) { |
| 1318 | DefineGenericOutput(*this, index, invocations); | 1353 | DefineGenericOutput(*this, index, invocations); |
| 1354 | used_locations.set(index); | ||
| 1355 | } | ||
| 1356 | } | ||
| 1357 | size_t previous_unused_location = 0; | ||
| 1358 | if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 1359 | const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1360 | previous_unused_location = location; | ||
| 1361 | used_locations.set(location); | ||
| 1362 | const Id id{DefineOutput(*this, F32[4], invocations)}; | ||
| 1363 | Decorate(id, spv::Decoration::Location, static_cast<u32>(location)); | ||
| 1364 | output_front_color = id; | ||
| 1365 | } | ||
| 1366 | for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) { | ||
| 1367 | if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) { | ||
| 1368 | const size_t location = | ||
| 1369 | FindNextUnusedLocation(used_locations, previous_unused_location); | ||
| 1370 | previous_unused_location = location; | ||
| 1371 | used_locations.set(location); | ||
| 1372 | const Id id{DefineOutput(*this, F32[4], invocations)}; | ||
| 1373 | Decorate(id, spv::Decoration::Location, location); | ||
| 1374 | output_fixed_fnc_textures[index] = id; | ||
| 1319 | } | 1375 | } |
| 1320 | } | 1376 | } |
| 1321 | switch (stage) { | 1377 | switch (stage) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e277bc358..847d0c0e6 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -268,10 +268,14 @@ public: | |||
| 268 | Id write_global_func_u32x4{}; | 268 | Id write_global_func_u32x4{}; |
| 269 | 269 | ||
| 270 | Id input_position{}; | 270 | Id input_position{}; |
| 271 | Id input_front_color{}; | ||
| 272 | std::array<Id, 10> input_fixed_fnc_textures{}; | ||
| 271 | std::array<Id, 32> input_generics{}; | 273 | std::array<Id, 32> input_generics{}; |
| 272 | 274 | ||
| 273 | Id output_point_size{}; | 275 | Id output_point_size{}; |
| 274 | Id output_position{}; | 276 | Id output_position{}; |
| 277 | Id output_front_color{}; | ||
| 278 | std::array<Id, 10> output_fixed_fnc_textures{}; | ||
| 275 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; | 279 | std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; |
| 276 | 280 | ||
| 277 | Id output_tess_level_outer{}; | 281 | Id output_tess_level_outer{}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index fb8c02a77..6f60c6574 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... | |||
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | bool IsFixedFncTexture(IR::Attribute attribute) { | ||
| 47 | return attribute >= IR::Attribute::FixedFncTexture0S && | ||
| 48 | attribute <= IR::Attribute::FixedFncTexture9Q; | ||
| 49 | } | ||
| 50 | |||
| 51 | u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) { | ||
| 52 | if (!IsFixedFncTexture(attribute)) { | ||
| 53 | throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); | ||
| 54 | } | ||
| 55 | return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u; | ||
| 56 | } | ||
| 57 | |||
| 58 | u32 FixedFncTextureAttributeElement(IR::Attribute attribute) { | ||
| 59 | if (!IsFixedFncTexture(attribute)) { | ||
| 60 | throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute); | ||
| 61 | } | ||
| 62 | return static_cast<u32>(attribute) % 4u; | ||
| 63 | } | ||
| 64 | |||
| 46 | template <typename... Args> | 65 | template <typename... Args> |
| 47 | Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { | 66 | Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { |
| 48 | if (ctx.stage == Stage::TessellationControl) { | 67 | if (ctx.stage == Stage::TessellationControl) { |
| @@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 74 | return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); | 93 | return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); |
| 75 | } | 94 | } |
| 76 | } | 95 | } |
| 96 | if (IsFixedFncTexture(attr)) { | ||
| 97 | const u32 index{FixedFncTextureAttributeIndex(attr)}; | ||
| 98 | const u32 element{FixedFncTextureAttributeElement(attr)}; | ||
| 99 | const Id element_id{ctx.Const(element)}; | ||
| 100 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index], | ||
| 101 | element_id); | ||
| 102 | } | ||
| 77 | switch (attr) { | 103 | switch (attr) { |
| 78 | case IR::Attribute::PointSize: | 104 | case IR::Attribute::PointSize: |
| 79 | return ctx.output_point_size; | 105 | return ctx.output_point_size; |
| @@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 85 | const Id element_id{ctx.Const(element)}; | 111 | const Id element_id{ctx.Const(element)}; |
| 86 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); | 112 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); |
| 87 | } | 113 | } |
| 114 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 115 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 116 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 117 | case IR::Attribute::ColorFrontDiffuseA: { | ||
| 118 | const u32 element{static_cast<u32>(attr) % 4}; | ||
| 119 | const Id element_id{ctx.Const(element)}; | ||
| 120 | return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id); | ||
| 121 | } | ||
| 88 | case IR::Attribute::ClipDistance0: | 122 | case IR::Attribute::ClipDistance0: |
| 89 | case IR::Attribute::ClipDistance1: | 123 | case IR::Attribute::ClipDistance1: |
| 90 | case IR::Attribute::ClipDistance2: | 124 | case IR::Attribute::ClipDistance2: |
| @@ -298,19 +332,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 298 | if (IR::IsGeneric(attr)) { | 332 | if (IR::IsGeneric(attr)) { |
| 299 | const u32 index{IR::GenericAttributeIndex(attr)}; | 333 | const u32 index{IR::GenericAttributeIndex(attr)}; |
| 300 | const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; | 334 | const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; |
| 301 | if (!type) { | 335 | if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) { |
| 302 | // Attribute is disabled | 336 | // Attribute is disabled or varying component is not written |
| 303 | return ctx.Const(element == 3 ? 1.0f : 0.0f); | 337 | return ctx.Const(element == 3 ? 1.0f : 0.0f); |
| 304 | } | 338 | } |
| 305 | if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) { | ||
| 306 | // Varying component is not written | ||
| 307 | return ctx.Const(type && element == 3 ? 1.0f : 0.0f); | ||
| 308 | } | ||
| 309 | const Id generic_id{ctx.input_generics.at(index)}; | 339 | const Id generic_id{ctx.input_generics.at(index)}; |
| 310 | const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; | 340 | const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; |
| 311 | const Id value{ctx.OpLoad(type->id, pointer)}; | 341 | const Id value{ctx.OpLoad(type->id, pointer)}; |
| 312 | return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; | 342 | return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; |
| 313 | } | 343 | } |
| 344 | if (IsFixedFncTexture(attr)) { | ||
| 345 | const u32 index{FixedFncTextureAttributeIndex(attr)}; | ||
| 346 | const Id attr_id{ctx.input_fixed_fnc_textures[index]}; | ||
| 347 | const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))}; | ||
| 348 | return ctx.OpLoad(ctx.F32[1], attr_ptr); | ||
| 349 | } | ||
| 314 | switch (attr) { | 350 | switch (attr) { |
| 315 | case IR::Attribute::PrimitiveId: | 351 | case IR::Attribute::PrimitiveId: |
| 316 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); | 352 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); |
| @@ -320,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 320 | case IR::Attribute::PositionW: | 356 | case IR::Attribute::PositionW: |
| 321 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, | 357 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, |
| 322 | ctx.Const(element))); | 358 | ctx.Const(element))); |
| 359 | case IR::Attribute::ColorFrontDiffuseR: | ||
| 360 | case IR::Attribute::ColorFrontDiffuseG: | ||
| 361 | case IR::Attribute::ColorFrontDiffuseB: | ||
| 362 | case IR::Attribute::ColorFrontDiffuseA: { | ||
| 363 | return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color, | ||
| 364 | ctx.Const(element))); | ||
| 365 | } | ||
| 323 | case IR::Attribute::InstanceId: | 366 | case IR::Attribute::InstanceId: |
| 324 | if (ctx.profile.support_vertex_instance_id) { | 367 | if (ctx.profile.support_vertex_instance_id) { |
| 325 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); | 368 | return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); |
| @@ -337,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | |||
| 337 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | 380 | return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); |
| 338 | } | 381 | } |
| 339 | case IR::Attribute::FrontFace: | 382 | case IR::Attribute::FrontFace: |
| 340 | return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), | 383 | return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), |
| 341 | ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); | 384 | ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), |
| 385 | ctx.f32_zero_value); | ||
| 342 | case IR::Attribute::PointSpriteS: | 386 | case IR::Attribute::PointSpriteS: |
| 343 | return ctx.OpLoad(ctx.F32[1], | 387 | return ctx.OpLoad(ctx.F32[1], |
| 344 | ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); | 388 | ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); |
| @@ -433,7 +477,13 @@ void EmitSetSampleMask(EmitContext& ctx, Id value) { | |||
| 433 | } | 477 | } |
| 434 | 478 | ||
| 435 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | 479 | void EmitSetFragDepth(EmitContext& ctx, Id value) { |
| 436 | ctx.OpStore(ctx.frag_depth, value); | 480 | if (!ctx.runtime_info.convert_depth_mode) { |
| 481 | ctx.OpStore(ctx.frag_depth, value); | ||
| 482 | return; | ||
| 483 | } | ||
| 484 | const Id unit{ctx.Const(0.5f)}; | ||
| 485 | const Id new_depth{ctx.OpFma(ctx.F32[1], value, unit, unit)}; | ||
| 486 | ctx.OpStore(ctx.frag_depth, new_depth); | ||
| 437 | } | 487 | } |
| 438 | 488 | ||
| 439 | void EmitGetZFlag(EmitContext&) { | 489 | void EmitGetZFlag(EmitContext&) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index 78b1e1ba7..cef52c56e 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -7,8 +7,13 @@ | |||
| 7 | 7 | ||
| 8 | namespace Shader::Backend::SPIRV { | 8 | namespace Shader::Backend::SPIRV { |
| 9 | namespace { | 9 | namespace { |
| 10 | Id GetThreadId(EmitContext& ctx) { | ||
| 11 | return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id); | ||
| 12 | } | ||
| 13 | |||
| 10 | Id WarpExtract(EmitContext& ctx, Id value) { | 14 | Id WarpExtract(EmitContext& ctx, Id value) { |
| 11 | const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 15 | const Id thread_id{GetThreadId(ctx)}; |
| 16 | const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; | ||
| 12 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | 17 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); |
| 13 | } | 18 | } |
| 14 | 19 | ||
| @@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | |||
| 48 | return ctx.OpSelect(ctx.U32[1], in_range, | 53 | return ctx.OpSelect(ctx.U32[1], in_range, |
| 49 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | 54 | ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); |
| 50 | } | 55 | } |
| 56 | |||
| 57 | Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) { | ||
| 58 | const Id thirty_two{ctx.Const(32u)}; | ||
| 59 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)}; | ||
| 60 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 61 | return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 62 | } | ||
| 51 | } // Anonymous namespace | 63 | } // Anonymous namespace |
| 52 | 64 | ||
| 53 | Id EmitLaneId(EmitContext& ctx) { | 65 | Id EmitLaneId(EmitContext& ctx) { |
| 54 | const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 66 | const Id id{GetThreadId(ctx)}; |
| 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { | 67 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 56 | return id; | 68 | return id; |
| 57 | } | 69 | } |
| @@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) { | |||
| 123 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 135 | Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 124 | Id segmentation_mask) { | 136 | Id segmentation_mask) { |
| 125 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | 137 | const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; |
| 126 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 138 | const Id thread_id{GetThreadId(ctx)}; |
| 139 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 140 | const Id thirty_two{ctx.Const(32u)}; | ||
| 141 | const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)}; | ||
| 142 | const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)}; | ||
| 143 | const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)}; | ||
| 144 | index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index); | ||
| 145 | clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp); | ||
| 146 | } | ||
| 127 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | 147 | const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; |
| 128 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | 148 | const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; |
| 129 | 149 | ||
| @@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla | |||
| 137 | 157 | ||
| 138 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 158 | Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 139 | Id segmentation_mask) { | 159 | Id segmentation_mask) { |
| 140 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 160 | const Id thread_id{GetThreadId(ctx)}; |
| 161 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 162 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 163 | } | ||
| 141 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 164 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 142 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | 165 | const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; |
| 143 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 166 | const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | |||
| 148 | 171 | ||
| 149 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 172 | Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 150 | Id segmentation_mask) { | 173 | Id segmentation_mask) { |
| 151 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 174 | const Id thread_id{GetThreadId(ctx)}; |
| 175 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 176 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 177 | } | ||
| 152 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 178 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 153 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | 179 | const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; |
| 154 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 180 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
| @@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam | |||
| 159 | 185 | ||
| 160 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | 186 | Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, |
| 161 | Id segmentation_mask) { | 187 | Id segmentation_mask) { |
| 162 | const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | 188 | const Id thread_id{GetThreadId(ctx)}; |
| 189 | if (ctx.profile.warp_size_potentially_larger_than_guest) { | ||
| 190 | clamp = GetUpperClamp(ctx, thread_id, clamp); | ||
| 191 | } | ||
| 163 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | 192 | const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; |
| 164 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | 193 | const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; |
| 165 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | 194 | const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; |
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp index 8b3e0a15c..69eeaa3e6 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "shader_recompiler/frontend/maxwell/decode.h" | 20 | #include "shader_recompiler/frontend/maxwell/decode.h" |
| 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" |
| 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" |
| 23 | #include "shader_recompiler/host_translate_info.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | 24 | #include "shader_recompiler/object_pool.h" |
| 24 | 25 | ||
| 25 | namespace Shader::Maxwell { | 26 | namespace Shader::Maxwell { |
| @@ -652,7 +653,7 @@ class TranslatePass { | |||
| 652 | public: | 653 | public: |
| 653 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, | 654 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, |
| 654 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, | 655 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, |
| 655 | IR::AbstractSyntaxList& syntax_list_) | 656 | IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) |
| 656 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, | 657 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, |
| 657 | syntax_list{syntax_list_} { | 658 | syntax_list{syntax_list_} { |
| 658 | Visit(root_stmt, nullptr, nullptr); | 659 | Visit(root_stmt, nullptr, nullptr); |
| @@ -660,6 +661,9 @@ public: | |||
| 660 | IR::Block& first_block{*syntax_list.front().data.block}; | 661 | IR::Block& first_block{*syntax_list.front().data.block}; |
| 661 | IR::IREmitter ir(first_block, first_block.begin()); | 662 | IR::IREmitter ir(first_block, first_block.begin()); |
| 662 | ir.Prologue(); | 663 | ir.Prologue(); |
| 664 | if (uses_demote_to_helper && host_info.needs_demote_reorder) { | ||
| 665 | DemoteCombinationPass(); | ||
| 666 | } | ||
| 663 | } | 667 | } |
| 664 | 668 | ||
| 665 | private: | 669 | private: |
| @@ -809,7 +813,14 @@ private: | |||
| 809 | } | 813 | } |
| 810 | case StatementType::Return: { | 814 | case StatementType::Return: { |
| 811 | ensure_block(); | 815 | ensure_block(); |
| 812 | IR::IREmitter{*current_block}.Epilogue(); | 816 | IR::Block* return_block{block_pool.Create(inst_pool)}; |
| 817 | IR::IREmitter{*return_block}.Epilogue(); | ||
| 818 | current_block->AddBranch(return_block); | ||
| 819 | |||
| 820 | auto& merge{syntax_list.emplace_back()}; | ||
| 821 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 822 | merge.data.block = return_block; | ||
| 823 | |||
| 813 | current_block = nullptr; | 824 | current_block = nullptr; |
| 814 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | 825 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; |
| 815 | break; | 826 | break; |
| @@ -824,6 +835,7 @@ private: | |||
| 824 | auto& merge{syntax_list.emplace_back()}; | 835 | auto& merge{syntax_list.emplace_back()}; |
| 825 | merge.type = IR::AbstractSyntaxNode::Type::Block; | 836 | merge.type = IR::AbstractSyntaxNode::Type::Block; |
| 826 | merge.data.block = demote_block; | 837 | merge.data.block = demote_block; |
| 838 | uses_demote_to_helper = true; | ||
| 827 | break; | 839 | break; |
| 828 | } | 840 | } |
| 829 | case StatementType::Unreachable: { | 841 | case StatementType::Unreachable: { |
| @@ -855,11 +867,117 @@ private: | |||
| 855 | return block_pool.Create(inst_pool); | 867 | return block_pool.Create(inst_pool); |
| 856 | } | 868 | } |
| 857 | 869 | ||
| 870 | void DemoteCombinationPass() { | ||
| 871 | using Type = IR::AbstractSyntaxNode::Type; | ||
| 872 | std::vector<IR::Block*> demote_blocks; | ||
| 873 | std::vector<IR::U1> demote_conds; | ||
| 874 | u32 num_epilogues{}; | ||
| 875 | u32 branch_depth{}; | ||
| 876 | for (const IR::AbstractSyntaxNode& node : syntax_list) { | ||
| 877 | if (node.type == Type::If) { | ||
| 878 | ++branch_depth; | ||
| 879 | } | ||
| 880 | if (node.type == Type::EndIf) { | ||
| 881 | --branch_depth; | ||
| 882 | } | ||
| 883 | if (node.type != Type::Block) { | ||
| 884 | continue; | ||
| 885 | } | ||
| 886 | if (branch_depth > 1) { | ||
| 887 | // Skip reordering nested demote branches. | ||
| 888 | continue; | ||
| 889 | } | ||
| 890 | for (const IR::Inst& inst : node.data.block->Instructions()) { | ||
| 891 | const IR::Opcode op{inst.GetOpcode()}; | ||
| 892 | if (op == IR::Opcode::DemoteToHelperInvocation) { | ||
| 893 | demote_blocks.push_back(node.data.block); | ||
| 894 | break; | ||
| 895 | } | ||
| 896 | if (op == IR::Opcode::Epilogue) { | ||
| 897 | ++num_epilogues; | ||
| 898 | } | ||
| 899 | } | ||
| 900 | } | ||
| 901 | if (demote_blocks.size() == 0) { | ||
| 902 | return; | ||
| 903 | } | ||
| 904 | if (num_epilogues > 1) { | ||
| 905 | LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); | ||
| 906 | return; | ||
| 907 | } | ||
| 908 | s64 last_iterator_offset{}; | ||
| 909 | auto& asl{syntax_list}; | ||
| 910 | for (const IR::Block* demote_block : demote_blocks) { | ||
| 911 | const auto start_it{asl.begin() + last_iterator_offset}; | ||
| 912 | auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||
| 913 | return asn.type == Type::If && asn.data.if_node.body == demote_block; | ||
| 914 | })}; | ||
| 915 | if (asl_it == asl.end()) { | ||
| 916 | // Demote without a conditional branch. | ||
| 917 | // No need to proceed since all fragment instances will be demoted regardless. | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | const IR::Block* const end_if = asl_it->data.if_node.merge; | ||
| 921 | demote_conds.push_back(asl_it->data.if_node.cond); | ||
| 922 | last_iterator_offset = std::distance(asl.begin(), asl_it); | ||
| 923 | |||
| 924 | asl_it = asl.erase(asl_it); | ||
| 925 | asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||
| 926 | return asn.type == Type::Block && asn.data.block == demote_block; | ||
| 927 | }); | ||
| 928 | |||
| 929 | asl_it = asl.erase(asl_it); | ||
| 930 | asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||
| 931 | return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; | ||
| 932 | }); | ||
| 933 | asl_it = asl.erase(asl_it); | ||
| 934 | } | ||
| 935 | const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { | ||
| 936 | if (asn.type != Type::Block) { | ||
| 937 | return false; | ||
| 938 | } | ||
| 939 | for (const auto& inst : asn.data.block->Instructions()) { | ||
| 940 | if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||
| 941 | return true; | ||
| 942 | } | ||
| 943 | } | ||
| 944 | return false; | ||
| 945 | }}; | ||
| 946 | const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; | ||
| 947 | const auto return_block_it{(reverse_it + 1).base()}; | ||
| 948 | |||
| 949 | IR::IREmitter ir{*(return_block_it - 1)->data.block}; | ||
| 950 | IR::U1 cond(IR::Value(false)); | ||
| 951 | for (const auto& demote_cond : demote_conds) { | ||
| 952 | cond = ir.LogicalOr(cond, demote_cond); | ||
| 953 | } | ||
| 954 | cond.Inst()->DestructiveAddUsage(1); | ||
| 955 | |||
| 956 | IR::AbstractSyntaxNode demote_if_node{}; | ||
| 957 | demote_if_node.type = Type::If; | ||
| 958 | demote_if_node.data.if_node.cond = cond; | ||
| 959 | demote_if_node.data.if_node.body = demote_blocks[0]; | ||
| 960 | demote_if_node.data.if_node.merge = return_block_it->data.block; | ||
| 961 | |||
| 962 | IR::AbstractSyntaxNode demote_node{}; | ||
| 963 | demote_node.type = Type::Block; | ||
| 964 | demote_node.data.block = demote_blocks[0]; | ||
| 965 | |||
| 966 | IR::AbstractSyntaxNode demote_endif_node{}; | ||
| 967 | demote_endif_node.type = Type::EndIf; | ||
| 968 | demote_endif_node.data.end_if.merge = return_block_it->data.block; | ||
| 969 | |||
| 970 | asl.insert(return_block_it, demote_endif_node); | ||
| 971 | asl.insert(return_block_it, demote_node); | ||
| 972 | asl.insert(return_block_it, demote_if_node); | ||
| 973 | } | ||
| 974 | |||
| 858 | ObjectPool<Statement>& stmt_pool; | 975 | ObjectPool<Statement>& stmt_pool; |
| 859 | ObjectPool<IR::Inst>& inst_pool; | 976 | ObjectPool<IR::Inst>& inst_pool; |
| 860 | ObjectPool<IR::Block>& block_pool; | 977 | ObjectPool<IR::Block>& block_pool; |
| 861 | Environment& env; | 978 | Environment& env; |
| 862 | IR::AbstractSyntaxList& syntax_list; | 979 | IR::AbstractSyntaxList& syntax_list; |
| 980 | bool uses_demote_to_helper{}; | ||
| 863 | 981 | ||
| 864 | // TODO: C++20 Remove this when all compilers support constexpr std::vector | 982 | // TODO: C++20 Remove this when all compilers support constexpr std::vector |
| 865 | #if __cpp_lib_constexpr_vector >= 201907 | 983 | #if __cpp_lib_constexpr_vector >= 201907 |
| @@ -871,12 +989,13 @@ private: | |||
| 871 | } // Anonymous namespace | 989 | } // Anonymous namespace |
| 872 | 990 | ||
| 873 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 991 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| 874 | Environment& env, Flow::CFG& cfg) { | 992 | Environment& env, Flow::CFG& cfg, |
| 993 | const HostTranslateInfo& host_info) { | ||
| 875 | ObjectPool<Statement> stmt_pool{64}; | 994 | ObjectPool<Statement> stmt_pool{64}; |
| 876 | GotoPass goto_pass{cfg, stmt_pool}; | 995 | GotoPass goto_pass{cfg, stmt_pool}; |
| 877 | Statement& root{goto_pass.RootStatement()}; | 996 | Statement& root{goto_pass.RootStatement()}; |
| 878 | IR::AbstractSyntaxList syntax_list; | 997 | IR::AbstractSyntaxList syntax_list; |
| 879 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; | 998 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; |
| 880 | return syntax_list; | 999 | return syntax_list; |
| 881 | } | 1000 | } |
| 882 | 1001 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h index 88b083649..e38158da3 100644 --- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h | |||
| @@ -11,10 +11,13 @@ | |||
| 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" |
| 12 | #include "shader_recompiler/object_pool.h" | 12 | #include "shader_recompiler/object_pool.h" |
| 13 | 13 | ||
| 14 | namespace Shader::Maxwell { | 14 | namespace Shader { |
| 15 | struct HostTranslateInfo; | ||
| 16 | namespace Maxwell { | ||
| 15 | 17 | ||
| 16 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, | 18 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, |
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | 19 | ObjectPool<IR::Block>& block_pool, Environment& env, |
| 18 | Flow::CFG& cfg); | 20 | Flow::CFG& cfg, const HostTranslateInfo& host_info); |
| 19 | 21 | ||
| 20 | } // namespace Shader::Maxwell | 22 | } // namespace Maxwell |
| 23 | } // namespace Shader | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index c067d459c..012d55357 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { | |||
| 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { |
| 132 | IR::Program program; | 132 | IR::Program program; |
| 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); |
| 134 | program.blocks = GenerateBlocks(program.syntax_list); | 134 | program.blocks = GenerateBlocks(program.syntax_list); |
| 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); | 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); |
| 136 | program.stage = env.ShaderStage(); | 136 | program.stage = env.ShaderStage(); |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 94a584219..96468b2e7 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -11,8 +11,9 @@ namespace Shader { | |||
| 11 | 11 | ||
| 12 | /// Misc information about the host | 12 | /// Misc information about the host |
| 13 | struct HostTranslateInfo { | 13 | struct HostTranslateInfo { |
| 14 | bool support_float16{}; ///< True when the device supports 16-bit floats | 14 | bool support_float16{}; ///< True when the device supports 16-bit floats |
| 15 | bool support_int64{}; ///< True when the device supports 64-bit integers | 15 | bool support_int64{}; ///< True when the device supports 64-bit integers |
| 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | ||
| 16 | }; | 17 | }; |
| 17 | 18 | ||
| 18 | } // namespace Shader | 19 | } // namespace Shader |
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 44ad10d43..225c238fb 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -492,7 +492,8 @@ void TexturePass(Environment& env, IR::Program& program) { | |||
| 492 | const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; | 492 | const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; |
| 493 | IR::IREmitter ir{*texture_inst.block, insert_point}; | 493 | IR::IREmitter ir{*texture_inst.block, insert_point}; |
| 494 | const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; | 494 | const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; |
| 495 | inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); | 495 | inst->SetArg(0, ir.SMin(ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift), |
| 496 | ir.Imm32(DESCRIPTOR_SIZE - 1))); | ||
| 496 | } else { | 497 | } else { |
| 497 | inst->SetArg(0, IR::Value{}); | 498 | inst->SetArg(0, IR::Value{}); |
| 498 | } | 499 | } |
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h index f3b12d04b..a12ddcc8f 100644 --- a/src/shader_recompiler/object_pool.h +++ b/src/shader_recompiler/object_pool.h | |||
| @@ -11,14 +11,16 @@ | |||
| 11 | namespace Shader { | 11 | namespace Shader { |
| 12 | 12 | ||
| 13 | template <typename T> | 13 | template <typename T> |
| 14 | requires std::is_destructible_v<T> class ObjectPool { | 14 | requires std::is_destructible_v<T> |
| 15 | class ObjectPool { | ||
| 15 | public: | 16 | public: |
| 16 | explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { | 17 | explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { |
| 17 | node = &chunks.emplace_back(new_chunk_size); | 18 | node = &chunks.emplace_back(new_chunk_size); |
| 18 | } | 19 | } |
| 19 | 20 | ||
| 20 | template <typename... Args> | 21 | template <typename... Args> |
| 21 | requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { | 22 | requires std::is_constructible_v<T, Args...> |
| 23 | [[nodiscard]] T* Create(Args&&... args) { | ||
| 22 | return std::construct_at(Memory(), std::forward<Args>(args)...); | 24 | return std::construct_at(Memory(), std::forward<Args>(args)...); |
| 23 | } | 25 | } |
| 24 | 26 | ||