summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp4
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp122
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp56
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp55
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp41
-rw-r--r--src/shader_recompiler/object_pool.h6
7 files changed, 229 insertions, 59 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 580063fa9..170db269a 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -58,8 +58,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; 58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; 59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
60 const auto extraction{num_bits == 32 ? cbuf_cast 60 const auto extraction{num_bits == 32 ? cbuf_cast
61 : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, 61 : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
62 bit_offset, num_bits)}; 62 bit_offset, num_bits)};
63 if (!component_indexing_bug) { 63 if (!component_indexing_bug) {
64 const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; 64 const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
65 ctx.Add("{}={};", ret, result); 65 ctx.Add("{}={};", ret, result);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index a982dd8a2..cd285e2c8 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -11,6 +11,8 @@
11 11
12namespace Shader::Backend::GLSL { 12namespace Shader::Backend::GLSL {
13namespace { 13namespace {
14constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
15
14void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { 16void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
15 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; 17 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
16 if (!in_bounds) { 18 if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
43 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); 45 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
44 SetInBoundsFlag(ctx, inst); 46 SetInBoundsFlag(ctx, inst);
45} 47}
48
49std::string_view BallotIndex(EmitContext& ctx) {
50 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
51 return ".x";
52 }
53 return "[gl_SubGroupInvocationARB>>5]";
54}
55
56std::string GetMask(EmitContext& ctx, std::string_view mask) {
57 const auto ballot_index{BallotIndex(ctx)};
58 return fmt::format("uint(uvec2({}){})", mask, ballot_index);
59}
46} // Anonymous namespace 60} // Anonymous namespace
47 61
48void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { 62void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
49 ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); 63 ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
50} 64}
51 65
52void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 66void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
53 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 67 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
54 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); 68 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
55 } else { 69 return;
56 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
57 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
58 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
59 } 70 }
71 const auto ballot_index{BallotIndex(ctx)};
72 const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
73 const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
74 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
60} 75}
61 76
62void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 77void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
63 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 78 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
64 ctx.AddU1("{}=anyInvocationARB({});", inst, pred); 79 ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
65 } else { 80 return;
66 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
67 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
68 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
69 } 81 }
82 const auto ballot_index{BallotIndex(ctx)};
83 const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
84 const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
85 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
70} 86}
71 87
72void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 88void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 89 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); 90 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
75 } else { 91 return;
76 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
77 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
78 const auto value{fmt::format("({}^{})", ballot, active_mask)};
79 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
80 } 92 }
93 const auto ballot_index{BallotIndex(ctx)};
94 const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
95 const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
96 const auto value{fmt::format("({}^{})", ballot, active_mask)};
97 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
81} 98}
82 99
83void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 100void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 101 const auto ballot_index{BallotIndex(ctx)};
85 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); 102 ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
86 } else {
87 ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
88 }
89} 103}
90 104
91void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { 105void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
92 ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); 106 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
93} 107}
94 108
95void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { 109void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
96 ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); 110 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
97} 111}
98 112
99void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { 113void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
100 ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); 114 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
101} 115}
102 116
103void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { 117void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
104 ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); 118 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
105} 119}
106 120
107void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { 121void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
108 ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); 122 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
109} 123}
110 124
111void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, 125void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
112 std::string_view index, std::string_view clamp, 126 std::string_view index, std::string_view clamp, std::string_view seg_mask) {
113 std::string_view segmentation_mask) {
114 if (ctx.profile.support_gl_warp_intrinsics) { 127 if (ctx.profile.support_gl_warp_intrinsics) {
115 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); 128 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
116 return; 129 return;
117 } 130 }
118 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; 131 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
119 const auto thread_id{"gl_SubGroupInvocationARB"}; 132 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
120 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; 133 const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
121 const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; 134 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
135
136 const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
137 const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
138 const auto max_thread_id{
139 ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
122 140
123 const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; 141 const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
124 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; 142 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
125 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 143 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
126 SetInBoundsFlag(ctx, inst); 144 SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
128} 146}
129 147
130void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, 148void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
131 std::string_view clamp, std::string_view segmentation_mask) { 149 std::string_view clamp, std::string_view seg_mask) {
132 if (ctx.profile.support_gl_warp_intrinsics) { 150 if (ctx.profile.support_gl_warp_intrinsics) {
133 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); 151 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
134 return; 152 return;
135 } 153 }
136 const auto thread_id{"gl_SubGroupInvocationARB"}; 154 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
137 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; 155 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
138 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; 156 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
157
158 const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
159 const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
139 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); 160 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
140 SetInBoundsFlag(ctx, inst); 161 SetInBoundsFlag(ctx, inst);
141 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); 162 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
142} 163}
143 164
144void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, 165void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
145 std::string_view index, std::string_view clamp, 166 std::string_view index, std::string_view clamp, std::string_view seg_mask) {
146 std::string_view segmentation_mask) {
147 if (ctx.profile.support_gl_warp_intrinsics) { 167 if (ctx.profile.support_gl_warp_intrinsics) {
148 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); 168 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
149 return; 169 return;
150 } 170 }
151 const auto thread_id{"gl_SubGroupInvocationARB"}; 171 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
152 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; 172 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
153 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; 173 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
174
175 const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
176 const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
154 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 177 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
155 SetInBoundsFlag(ctx, inst); 178 SetInBoundsFlag(ctx, inst);
156 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); 179 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
158 181
159void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, 182void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
160 std::string_view index, std::string_view clamp, 183 std::string_view index, std::string_view clamp,
161 std::string_view segmentation_mask) { 184 std::string_view seg_mask) {
162 if (ctx.profile.support_gl_warp_intrinsics) { 185 if (ctx.profile.support_gl_warp_intrinsics) {
163 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); 186 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
164 return; 187 return;
165 } 188 }
166 const auto thread_id{"gl_SubGroupInvocationARB"}; 189 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
167 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; 190 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
168 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; 191 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
192
193 const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
194 const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
169 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 195 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
170 SetInBoundsFlag(ctx, inst); 196 SetInBoundsFlag(ctx, inst);
171 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); 197 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2d29d8c14..2885e6799 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@
15 15
16namespace Shader::Backend::SPIRV { 16namespace Shader::Backend::SPIRV {
17namespace { 17namespace {
18constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
19
18enum class Operation { 20enum class Operation {
19 Increment, 21 Increment,
20 Decrement, 22 Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
427 return pointer_type; 429 return pointer_type;
428 } 430 }
429} 431}
432
433size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
434 size_t start_offset) {
435 for (size_t location = start_offset; location < used_locations.size(); ++location) {
436 if (!used_locations.test(location)) {
437 return location;
438 }
439 }
440 throw RuntimeError("Unable to get an unused location for legacy attribute");
441}
430} // Anonymous namespace 442} // Anonymous namespace
431 443
432void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { 444void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1227 loads[IR::Attribute::TessellationEvaluationPointV]) { 1239 loads[IR::Attribute::TessellationEvaluationPointV]) {
1228 tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); 1240 tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
1229 } 1241 }
1242 std::bitset<IR::NUM_GENERICS> used_locations{};
1230 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { 1243 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1231 const AttributeType input_type{runtime_info.generic_input_types[index]}; 1244 const AttributeType input_type{runtime_info.generic_input_types[index]};
1232 if (!runtime_info.previous_stage_stores.Generic(index)) { 1245 if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1238 if (input_type == AttributeType::Disabled) { 1251 if (input_type == AttributeType::Disabled) {
1239 continue; 1252 continue;
1240 } 1253 }
1254 used_locations.set(index);
1241 const Id type{GetAttributeType(*this, input_type)}; 1255 const Id type{GetAttributeType(*this, input_type)};
1242 const Id id{DefineInput(*this, type, true)}; 1256 const Id id{DefineInput(*this, type, true)};
1243 Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); 1257 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1263 break; 1277 break;
1264 } 1278 }
1265 } 1279 }
1280 size_t previous_unused_location = 0;
1281 if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
1282 const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
1283 previous_unused_location = location;
1284 used_locations.set(location);
1285 const Id id{DefineInput(*this, F32[4], true)};
1286 Decorate(id, spv::Decoration::Location, location);
1287 input_front_color = id;
1288 }
1289 for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
1290 if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
1291 const size_t location =
1292 FindNextUnusedLocation(used_locations, previous_unused_location);
1293 previous_unused_location = location;
1294 used_locations.set(location);
1295 const Id id{DefineInput(*this, F32[4], true)};
1296 Decorate(id, spv::Decoration::Location, location);
1297 input_fixed_fnc_textures[index] = id;
1298 }
1299 }
1266 if (stage == Stage::TessellationEval) { 1300 if (stage == Stage::TessellationEval) {
1267 for (size_t index = 0; index < info.uses_patches.size(); ++index) { 1301 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
1268 if (!info.uses_patches[index]) { 1302 if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
1313 viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, 1347 viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
1314 spv::BuiltIn::ViewportMaskNV); 1348 spv::BuiltIn::ViewportMaskNV);
1315 } 1349 }
1350 std::bitset<IR::NUM_GENERICS> used_locations{};
1316 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { 1351 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1317 if (info.stores.Generic(index)) { 1352 if (info.stores.Generic(index)) {
1318 DefineGenericOutput(*this, index, invocations); 1353 DefineGenericOutput(*this, index, invocations);
1354 used_locations.set(index);
1355 }
1356 }
1357 size_t previous_unused_location = 0;
1358 if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
1359 const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
1360 previous_unused_location = location;
1361 used_locations.set(location);
1362 const Id id{DefineOutput(*this, F32[4], invocations)};
1363 Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
1364 output_front_color = id;
1365 }
1366 for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
1367 if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
1368 const size_t location =
1369 FindNextUnusedLocation(used_locations, previous_unused_location);
1370 previous_unused_location = location;
1371 used_locations.set(location);
1372 const Id id{DefineOutput(*this, F32[4], invocations)};
1373 Decorate(id, spv::Decoration::Location, location);
1374 output_fixed_fnc_textures[index] = id;
1319 } 1375 }
1320 } 1376 }
1321 switch (stage) { 1377 switch (stage) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e277bc358..847d0c0e6 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
268 Id write_global_func_u32x4{}; 268 Id write_global_func_u32x4{};
269 269
270 Id input_position{}; 270 Id input_position{};
271 Id input_front_color{};
272 std::array<Id, 10> input_fixed_fnc_textures{};
271 std::array<Id, 32> input_generics{}; 273 std::array<Id, 32> input_generics{};
272 274
273 Id output_point_size{}; 275 Id output_point_size{};
274 Id output_position{}; 276 Id output_position{};
277 Id output_front_color{};
278 std::array<Id, 10> output_fixed_fnc_textures{};
275 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; 279 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
276 280
277 Id output_tess_level_outer{}; 281 Id output_tess_level_outer{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 9e54a17ee..6f60c6574 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
43 } 43 }
44} 44}
45 45
46bool IsFixedFncTexture(IR::Attribute attribute) {
47 return attribute >= IR::Attribute::FixedFncTexture0S &&
48 attribute <= IR::Attribute::FixedFncTexture9Q;
49}
50
51u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
52 if (!IsFixedFncTexture(attribute)) {
53 throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
54 }
55 return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
56}
57
58u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
59 if (!IsFixedFncTexture(attribute)) {
60 throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
61 }
62 return static_cast<u32>(attribute) % 4u;
63}
64
46template <typename... Args> 65template <typename... Args>
47Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { 66Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
48 if (ctx.stage == Stage::TessellationControl) { 67 if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
74 return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); 93 return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
75 } 94 }
76 } 95 }
96 if (IsFixedFncTexture(attr)) {
97 const u32 index{FixedFncTextureAttributeIndex(attr)};
98 const u32 element{FixedFncTextureAttributeElement(attr)};
99 const Id element_id{ctx.Const(element)};
100 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
101 element_id);
102 }
77 switch (attr) { 103 switch (attr) {
78 case IR::Attribute::PointSize: 104 case IR::Attribute::PointSize:
79 return ctx.output_point_size; 105 return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
85 const Id element_id{ctx.Const(element)}; 111 const Id element_id{ctx.Const(element)};
86 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); 112 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
87 } 113 }
114 case IR::Attribute::ColorFrontDiffuseR:
115 case IR::Attribute::ColorFrontDiffuseG:
116 case IR::Attribute::ColorFrontDiffuseB:
117 case IR::Attribute::ColorFrontDiffuseA: {
118 const u32 element{static_cast<u32>(attr) % 4};
119 const Id element_id{ctx.Const(element)};
120 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
121 }
88 case IR::Attribute::ClipDistance0: 122 case IR::Attribute::ClipDistance0:
89 case IR::Attribute::ClipDistance1: 123 case IR::Attribute::ClipDistance1:
90 case IR::Attribute::ClipDistance2: 124 case IR::Attribute::ClipDistance2:
@@ -307,6 +341,12 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
307 const Id value{ctx.OpLoad(type->id, pointer)}; 341 const Id value{ctx.OpLoad(type->id, pointer)};
308 return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; 342 return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
309 } 343 }
344 if (IsFixedFncTexture(attr)) {
345 const u32 index{FixedFncTextureAttributeIndex(attr)};
346 const Id attr_id{ctx.input_fixed_fnc_textures[index]};
347 const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
348 return ctx.OpLoad(ctx.F32[1], attr_ptr);
349 }
310 switch (attr) { 350 switch (attr) {
311 case IR::Attribute::PrimitiveId: 351 case IR::Attribute::PrimitiveId:
312 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); 352 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -316,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
316 case IR::Attribute::PositionW: 356 case IR::Attribute::PositionW:
317 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, 357 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
318 ctx.Const(element))); 358 ctx.Const(element)));
359 case IR::Attribute::ColorFrontDiffuseR:
360 case IR::Attribute::ColorFrontDiffuseG:
361 case IR::Attribute::ColorFrontDiffuseB:
362 case IR::Attribute::ColorFrontDiffuseA: {
363 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
364 ctx.Const(element)));
365 }
319 case IR::Attribute::InstanceId: 366 case IR::Attribute::InstanceId:
320 if (ctx.profile.support_vertex_instance_id) { 367 if (ctx.profile.support_vertex_instance_id) {
321 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); 368 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -430,7 +477,13 @@ void EmitSetSampleMask(EmitContext& ctx, Id value) {
430} 477}
431 478
432void EmitSetFragDepth(EmitContext& ctx, Id value) { 479void EmitSetFragDepth(EmitContext& ctx, Id value) {
433 ctx.OpStore(ctx.frag_depth, value); 480 if (!ctx.runtime_info.convert_depth_mode) {
481 ctx.OpStore(ctx.frag_depth, value);
482 return;
483 }
484 const Id unit{ctx.Const(0.5f)};
485 const Id new_depth{ctx.OpFma(ctx.F32[1], value, unit, unit)};
486 ctx.OpStore(ctx.frag_depth, new_depth);
434} 487}
435 488
436void EmitGetZFlag(EmitContext&) { 489void EmitGetZFlag(EmitContext&) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 78b1e1ba7..cef52c56e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,8 +7,13 @@
7 7
8namespace Shader::Backend::SPIRV { 8namespace Shader::Backend::SPIRV {
9namespace { 9namespace {
10Id GetThreadId(EmitContext& ctx) {
11 return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
12}
13
10Id WarpExtract(EmitContext& ctx, Id value) { 14Id WarpExtract(EmitContext& ctx, Id value) {
11 const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 15 const Id thread_id{GetThreadId(ctx)};
16 const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
12 return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); 17 return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
13} 18}
14 19
@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
48 return ctx.OpSelect(ctx.U32[1], in_range, 53 return ctx.OpSelect(ctx.U32[1], in_range,
49 ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); 54 ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
50} 55}
56
57Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
58 const Id thirty_two{ctx.Const(32u)};
59 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
60 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
61 return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
62}
51} // Anonymous namespace 63} // Anonymous namespace
52 64
53Id EmitLaneId(EmitContext& ctx) { 65Id EmitLaneId(EmitContext& ctx) {
54 const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 66 const Id id{GetThreadId(ctx)};
55 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 67 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
56 return id; 68 return id;
57 } 69 }
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
123Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 135Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
124 Id segmentation_mask) { 136 Id segmentation_mask) {
125 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; 137 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
126 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 138 const Id thread_id{GetThreadId(ctx)};
139 if (ctx.profile.warp_size_potentially_larger_than_guest) {
140 const Id thirty_two{ctx.Const(32u)};
141 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
142 const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
143 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
144 index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
145 clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
146 }
127 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; 147 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
128 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; 148 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
129 149
@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
137 157
138Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 158Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
139 Id segmentation_mask) { 159 Id segmentation_mask) {
140 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 160 const Id thread_id{GetThreadId(ctx)};
161 if (ctx.profile.warp_size_potentially_larger_than_guest) {
162 clamp = GetUpperClamp(ctx, thread_id, clamp);
163 }
141 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 164 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
142 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; 165 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
143 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 166 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
148 171
149Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 172Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
150 Id segmentation_mask) { 173 Id segmentation_mask) {
151 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 174 const Id thread_id{GetThreadId(ctx)};
175 if (ctx.profile.warp_size_potentially_larger_than_guest) {
176 clamp = GetUpperClamp(ctx, thread_id, clamp);
177 }
152 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 178 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
153 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; 179 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
154 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 180 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
159 185
160Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 186Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
161 Id segmentation_mask) { 187 Id segmentation_mask) {
162 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 188 const Id thread_id{GetThreadId(ctx)};
189 if (ctx.profile.warp_size_potentially_larger_than_guest) {
190 clamp = GetUpperClamp(ctx, thread_id, clamp);
191 }
163 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 192 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
164 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; 193 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
165 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 194 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
index f3b12d04b..a12ddcc8f 100644
--- a/src/shader_recompiler/object_pool.h
+++ b/src/shader_recompiler/object_pool.h
@@ -11,14 +11,16 @@
11namespace Shader { 11namespace Shader {
12 12
13template <typename T> 13template <typename T>
14requires std::is_destructible_v<T> class ObjectPool { 14requires std::is_destructible_v<T>
15class ObjectPool {
15public: 16public:
16 explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { 17 explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
17 node = &chunks.emplace_back(new_chunk_size); 18 node = &chunks.emplace_back(new_chunk_size);
18 } 19 }
19 20
20 template <typename... Args> 21 template <typename... Args>
21 requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { 22 requires std::is_constructible_v<T, Args...>
23 [[nodiscard]] T* Create(Args&&... args) {
22 return std::construct_at(Memory(), std::forward<Args>(args)...); 24 return std::construct_at(Memory(), std::forward<Args>(args)...);
23 } 25 }
24 26