summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp4
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp122
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp56
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp68
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h9
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/host_translate_info.h5
-rw-r--r--src/shader_recompiler/ir_opt/texture_pass.cpp3
-rw-r--r--src/shader_recompiler/object_pool.h6
12 files changed, 369 insertions, 78 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 580063fa9..170db269a 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -58,8 +58,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding,
58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; 58 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; 59 const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)};
60 const auto extraction{num_bits == 32 ? cbuf_cast 60 const auto extraction{num_bits == 32 ? cbuf_cast
61 : fmt ::format("bitfieldExtract({},int({}),{})", cbuf_cast, 61 : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast,
62 bit_offset, num_bits)}; 62 bit_offset, num_bits)};
63 if (!component_indexing_bug) { 63 if (!component_indexing_bug) {
64 const auto result{fmt::format(fmt::runtime(extraction), swizzle)}; 64 const auto result{fmt::format(fmt::runtime(extraction), swizzle)};
65 ctx.Add("{}={};", ret, result); 65 ctx.Add("{}={};", ret, result);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index a982dd8a2..cd285e2c8 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -11,6 +11,8 @@
11 11
12namespace Shader::Backend::GLSL { 12namespace Shader::Backend::GLSL {
13namespace { 13namespace {
14constexpr char THREAD_ID[]{"gl_SubGroupInvocationARB"};
15
14void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { 16void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
15 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; 17 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
16 if (!in_bounds) { 18 if (!in_bounds) {
@@ -43,84 +45,100 @@ void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
43 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width); 45 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
44 SetInBoundsFlag(ctx, inst); 46 SetInBoundsFlag(ctx, inst);
45} 47}
48
49std::string_view BallotIndex(EmitContext& ctx) {
50 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
51 return ".x";
52 }
53 return "[gl_SubGroupInvocationARB>>5]";
54}
55
56std::string GetMask(EmitContext& ctx, std::string_view mask) {
57 const auto ballot_index{BallotIndex(ctx)};
58 return fmt::format("uint(uvec2({}){})", mask, ballot_index);
59}
46} // Anonymous namespace 60} // Anonymous namespace
47 61
48void EmitLaneId(EmitContext& ctx, IR::Inst& inst) { 62void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
49 ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst); 63 ctx.AddU32("{}={}&31u;", inst, THREAD_ID);
50} 64}
51 65
52void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 66void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
53 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 67 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
54 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); 68 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
55 } else { 69 return;
56 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
57 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
58 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
59 } 70 }
71 const auto ballot_index{BallotIndex(ctx)};
72 const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
73 const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
74 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
60} 75}
61 76
62void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 77void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
63 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 78 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
64 ctx.AddU1("{}=anyInvocationARB({});", inst, pred); 79 ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
65 } else { 80 return;
66 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
67 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
68 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
69 } 81 }
82 const auto ballot_index{BallotIndex(ctx)};
83 const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
84 const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
85 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
70} 86}
71 87
72void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 88void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 89 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); 90 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
75 } else { 91 return;
76 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
77 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
78 const auto value{fmt::format("({}^{})", ballot, active_mask)};
79 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
80 } 92 }
93 const auto ballot_index{BallotIndex(ctx)};
94 const auto active_mask{fmt::format("uvec2(ballotARB(true)){}", ballot_index)};
95 const auto ballot{fmt::format("uvec2(ballotARB({})){}", pred, ballot_index)};
96 const auto value{fmt::format("({}^{})", ballot, active_mask)};
97 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
81} 98}
82 99
83void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 100void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 101 const auto ballot_index{BallotIndex(ctx)};
85 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); 102 ctx.AddU32("{}=uvec2(ballotARB({})){};", inst, pred, ballot_index);
86 } else {
87 ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
88 }
89} 103}
90 104
91void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { 105void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
92 ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst); 106 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupEqMaskARB"));
93} 107}
94 108
95void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) { 109void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
96 ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst); 110 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLtMaskARB"));
97} 111}
98 112
99void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) { 113void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
100 ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst); 114 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupLeMaskARB"));
101} 115}
102 116
103void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) { 117void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
104 ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst); 118 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGtMaskARB"));
105} 119}
106 120
107void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { 121void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
108 ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst); 122 ctx.AddU32("{}={};", inst, GetMask(ctx, "gl_SubGroupGeMaskARB"));
109} 123}
110 124
111void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, 125void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
112 std::string_view index, std::string_view clamp, 126 std::string_view index, std::string_view clamp, std::string_view seg_mask) {
113 std::string_view segmentation_mask) {
114 if (ctx.profile.support_gl_warp_intrinsics) { 127 if (ctx.profile.support_gl_warp_intrinsics) {
115 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask); 128 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, seg_mask);
116 return; 129 return;
117 } 130 }
118 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)}; 131 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
119 const auto thread_id{"gl_SubGroupInvocationARB"}; 132 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
120 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)}; 133 const auto upper_index{fmt::format("{}?{}+32:{}", is_upper_partition, index, index)};
121 const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)}; 134 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
135
136 const auto not_seg_mask{fmt::format("(~{})", seg_mask)};
137 const auto min_thread_id{ComputeMinThreadId(THREAD_ID, seg_mask)};
138 const auto max_thread_id{
139 ComputeMaxThreadId(min_thread_id, big_warp ? upper_clamp : clamp, not_seg_mask)};
122 140
123 const auto lhs{fmt::format("({}&{})", index, not_seg_mask)}; 141 const auto lhs{fmt::format("({}&{})", big_warp ? upper_index : index, not_seg_mask)};
124 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)}; 142 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
125 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 143 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
126 SetInBoundsFlag(ctx, inst); 144 SetInBoundsFlag(ctx, inst);
@@ -128,29 +146,34 @@ void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
128} 146}
129 147
130void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, 148void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
131 std::string_view clamp, std::string_view segmentation_mask) { 149 std::string_view clamp, std::string_view seg_mask) {
132 if (ctx.profile.support_gl_warp_intrinsics) { 150 if (ctx.profile.support_gl_warp_intrinsics) {
133 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask); 151 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, seg_mask);
134 return; 152 return;
135 } 153 }
136 const auto thread_id{"gl_SubGroupInvocationARB"}; 154 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
137 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; 155 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
138 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)}; 156 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
157
158 const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
159 const auto src_thread_id{fmt::format("({}-{})", THREAD_ID, index)};
139 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id); 160 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
140 SetInBoundsFlag(ctx, inst); 161 SetInBoundsFlag(ctx, inst);
141 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); 162 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
142} 163}
143 164
144void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, 165void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
145 std::string_view index, std::string_view clamp, 166 std::string_view index, std::string_view clamp, std::string_view seg_mask) {
146 std::string_view segmentation_mask) {
147 if (ctx.profile.support_gl_warp_intrinsics) { 167 if (ctx.profile.support_gl_warp_intrinsics) {
148 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask); 168 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, seg_mask);
149 return; 169 return;
150 } 170 }
151 const auto thread_id{"gl_SubGroupInvocationARB"}; 171 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
152 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; 172 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
153 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)}; 173 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
174
175 const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
176 const auto src_thread_id{fmt::format("({}+{})", THREAD_ID, index)};
154 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 177 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
155 SetInBoundsFlag(ctx, inst); 178 SetInBoundsFlag(ctx, inst);
156 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); 179 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
@@ -158,14 +181,17 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
158 181
159void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, 182void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
160 std::string_view index, std::string_view clamp, 183 std::string_view index, std::string_view clamp,
161 std::string_view segmentation_mask) { 184 std::string_view seg_mask) {
162 if (ctx.profile.support_gl_warp_intrinsics) { 185 if (ctx.profile.support_gl_warp_intrinsics) {
163 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask); 186 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, seg_mask);
164 return; 187 return;
165 } 188 }
166 const auto thread_id{"gl_SubGroupInvocationARB"}; 189 const bool big_warp{ctx.profile.warp_size_potentially_larger_than_guest};
167 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)}; 190 const auto is_upper_partition{"int(gl_SubGroupInvocationARB)>=32"};
168 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)}; 191 const auto upper_clamp{fmt::format("{}?{}+32:{}", is_upper_partition, clamp, clamp)};
192
193 const auto max_thread_id{GetMaxThreadId(THREAD_ID, big_warp ? upper_clamp : clamp, seg_mask)};
194 const auto src_thread_id{fmt::format("({}^{})", THREAD_ID, index)};
169 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id); 195 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
170 SetInBoundsFlag(ctx, inst); 196 SetInBoundsFlag(ctx, inst);
171 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value); 197 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2d29d8c14..2885e6799 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,8 @@
15 15
16namespace Shader::Backend::SPIRV { 16namespace Shader::Backend::SPIRV {
17namespace { 17namespace {
18constexpr size_t NUM_FIXEDFNCTEXTURE = 10;
19
18enum class Operation { 20enum class Operation {
19 Increment, 21 Increment,
20 Decrement, 22 Decrement,
@@ -427,6 +429,16 @@ Id DescType(EmitContext& ctx, Id sampled_type, Id pointer_type, u32 count) {
427 return pointer_type; 429 return pointer_type;
428 } 430 }
429} 431}
432
433size_t FindNextUnusedLocation(const std::bitset<IR::NUM_GENERICS>& used_locations,
434 size_t start_offset) {
435 for (size_t location = start_offset; location < used_locations.size(); ++location) {
436 if (!used_locations.test(location)) {
437 return location;
438 }
439 }
440 throw RuntimeError("Unable to get an unused location for legacy attribute");
441}
430} // Anonymous namespace 442} // Anonymous namespace
431 443
432void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) { 444void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
@@ -1227,6 +1239,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1227 loads[IR::Attribute::TessellationEvaluationPointV]) { 1239 loads[IR::Attribute::TessellationEvaluationPointV]) {
1228 tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord); 1240 tess_coord = DefineInput(*this, F32[3], false, spv::BuiltIn::TessCoord);
1229 } 1241 }
1242 std::bitset<IR::NUM_GENERICS> used_locations{};
1230 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { 1243 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1231 const AttributeType input_type{runtime_info.generic_input_types[index]}; 1244 const AttributeType input_type{runtime_info.generic_input_types[index]};
1232 if (!runtime_info.previous_stage_stores.Generic(index)) { 1245 if (!runtime_info.previous_stage_stores.Generic(index)) {
@@ -1238,6 +1251,7 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1238 if (input_type == AttributeType::Disabled) { 1251 if (input_type == AttributeType::Disabled) {
1239 continue; 1252 continue;
1240 } 1253 }
1254 used_locations.set(index);
1241 const Id type{GetAttributeType(*this, input_type)}; 1255 const Id type{GetAttributeType(*this, input_type)};
1242 const Id id{DefineInput(*this, type, true)}; 1256 const Id id{DefineInput(*this, type, true)};
1243 Decorate(id, spv::Decoration::Location, static_cast<u32>(index)); 1257 Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
@@ -1263,6 +1277,26 @@ void EmitContext::DefineInputs(const IR::Program& program) {
1263 break; 1277 break;
1264 } 1278 }
1265 } 1279 }
1280 size_t previous_unused_location = 0;
1281 if (loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
1282 const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
1283 previous_unused_location = location;
1284 used_locations.set(location);
1285 const Id id{DefineInput(*this, F32[4], true)};
1286 Decorate(id, spv::Decoration::Location, location);
1287 input_front_color = id;
1288 }
1289 for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
1290 if (loads.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
1291 const size_t location =
1292 FindNextUnusedLocation(used_locations, previous_unused_location);
1293 previous_unused_location = location;
1294 used_locations.set(location);
1295 const Id id{DefineInput(*this, F32[4], true)};
1296 Decorate(id, spv::Decoration::Location, location);
1297 input_fixed_fnc_textures[index] = id;
1298 }
1299 }
1266 if (stage == Stage::TessellationEval) { 1300 if (stage == Stage::TessellationEval) {
1267 for (size_t index = 0; index < info.uses_patches.size(); ++index) { 1301 for (size_t index = 0; index < info.uses_patches.size(); ++index) {
1268 if (!info.uses_patches[index]) { 1302 if (!info.uses_patches[index]) {
@@ -1313,9 +1347,31 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
1313 viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt, 1347 viewport_mask = DefineOutput(*this, TypeArray(U32[1], Const(1u)), std::nullopt,
1314 spv::BuiltIn::ViewportMaskNV); 1348 spv::BuiltIn::ViewportMaskNV);
1315 } 1349 }
1350 std::bitset<IR::NUM_GENERICS> used_locations{};
1316 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { 1351 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
1317 if (info.stores.Generic(index)) { 1352 if (info.stores.Generic(index)) {
1318 DefineGenericOutput(*this, index, invocations); 1353 DefineGenericOutput(*this, index, invocations);
1354 used_locations.set(index);
1355 }
1356 }
1357 size_t previous_unused_location = 0;
1358 if (info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) {
1359 const size_t location = FindNextUnusedLocation(used_locations, previous_unused_location);
1360 previous_unused_location = location;
1361 used_locations.set(location);
1362 const Id id{DefineOutput(*this, F32[4], invocations)};
1363 Decorate(id, spv::Decoration::Location, static_cast<u32>(location));
1364 output_front_color = id;
1365 }
1366 for (size_t index = 0; index < NUM_FIXEDFNCTEXTURE; ++index) {
1367 if (info.stores.AnyComponent(IR::Attribute::FixedFncTexture0S + index * 4)) {
1368 const size_t location =
1369 FindNextUnusedLocation(used_locations, previous_unused_location);
1370 previous_unused_location = location;
1371 used_locations.set(location);
1372 const Id id{DefineOutput(*this, F32[4], invocations)};
1373 Decorate(id, spv::Decoration::Location, location);
1374 output_fixed_fnc_textures[index] = id;
1319 } 1375 }
1320 } 1376 }
1321 switch (stage) { 1377 switch (stage) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e277bc358..847d0c0e6 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -268,10 +268,14 @@ public:
268 Id write_global_func_u32x4{}; 268 Id write_global_func_u32x4{};
269 269
270 Id input_position{}; 270 Id input_position{};
271 Id input_front_color{};
272 std::array<Id, 10> input_fixed_fnc_textures{};
271 std::array<Id, 32> input_generics{}; 273 std::array<Id, 32> input_generics{};
272 274
273 Id output_point_size{}; 275 Id output_point_size{};
274 Id output_position{}; 276 Id output_position{};
277 Id output_front_color{};
278 std::array<Id, 10> output_fixed_fnc_textures{};
275 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{}; 279 std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
276 280
277 Id output_tess_level_outer{}; 281 Id output_tess_level_outer{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index fb8c02a77..6f60c6574 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -43,6 +43,25 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
43 } 43 }
44} 44}
45 45
46bool IsFixedFncTexture(IR::Attribute attribute) {
47 return attribute >= IR::Attribute::FixedFncTexture0S &&
48 attribute <= IR::Attribute::FixedFncTexture9Q;
49}
50
51u32 FixedFncTextureAttributeIndex(IR::Attribute attribute) {
52 if (!IsFixedFncTexture(attribute)) {
53 throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
54 }
55 return (static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::FixedFncTexture0S)) / 4u;
56}
57
58u32 FixedFncTextureAttributeElement(IR::Attribute attribute) {
59 if (!IsFixedFncTexture(attribute)) {
60 throw InvalidArgument("Attribute {} is not a FixedFncTexture", attribute);
61 }
62 return static_cast<u32>(attribute) % 4u;
63}
64
46template <typename... Args> 65template <typename... Args>
47Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) { 66Id OutputAccessChain(EmitContext& ctx, Id result_type, Id base, Args&&... args) {
48 if (ctx.stage == Stage::TessellationControl) { 67 if (ctx.stage == Stage::TessellationControl) {
@@ -74,6 +93,13 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
74 return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id); 93 return OutputAccessChain(ctx, ctx.output_f32, info.id, index_id);
75 } 94 }
76 } 95 }
96 if (IsFixedFncTexture(attr)) {
97 const u32 index{FixedFncTextureAttributeIndex(attr)};
98 const u32 element{FixedFncTextureAttributeElement(attr)};
99 const Id element_id{ctx.Const(element)};
100 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_fixed_fnc_textures[index],
101 element_id);
102 }
77 switch (attr) { 103 switch (attr) {
78 case IR::Attribute::PointSize: 104 case IR::Attribute::PointSize:
79 return ctx.output_point_size; 105 return ctx.output_point_size;
@@ -85,6 +111,14 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
85 const Id element_id{ctx.Const(element)}; 111 const Id element_id{ctx.Const(element)};
86 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id); 112 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_position, element_id);
87 } 113 }
114 case IR::Attribute::ColorFrontDiffuseR:
115 case IR::Attribute::ColorFrontDiffuseG:
116 case IR::Attribute::ColorFrontDiffuseB:
117 case IR::Attribute::ColorFrontDiffuseA: {
118 const u32 element{static_cast<u32>(attr) % 4};
119 const Id element_id{ctx.Const(element)};
120 return OutputAccessChain(ctx, ctx.output_f32, ctx.output_front_color, element_id);
121 }
88 case IR::Attribute::ClipDistance0: 122 case IR::Attribute::ClipDistance0:
89 case IR::Attribute::ClipDistance1: 123 case IR::Attribute::ClipDistance1:
90 case IR::Attribute::ClipDistance2: 124 case IR::Attribute::ClipDistance2:
@@ -298,19 +332,21 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
298 if (IR::IsGeneric(attr)) { 332 if (IR::IsGeneric(attr)) {
299 const u32 index{IR::GenericAttributeIndex(attr)}; 333 const u32 index{IR::GenericAttributeIndex(attr)};
300 const std::optional<AttrInfo> type{AttrTypes(ctx, index)}; 334 const std::optional<AttrInfo> type{AttrTypes(ctx, index)};
301 if (!type) { 335 if (!type || !ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
302 // Attribute is disabled 336 // Attribute is disabled or varying component is not written
303 return ctx.Const(element == 3 ? 1.0f : 0.0f); 337 return ctx.Const(element == 3 ? 1.0f : 0.0f);
304 } 338 }
305 if (!ctx.runtime_info.previous_stage_stores.Generic(index, element)) {
306 // Varying component is not written
307 return ctx.Const(type && element == 3 ? 1.0f : 0.0f);
308 }
309 const Id generic_id{ctx.input_generics.at(index)}; 339 const Id generic_id{ctx.input_generics.at(index)};
310 const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))}; 340 const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, ctx.Const(element))};
311 const Id value{ctx.OpLoad(type->id, pointer)}; 341 const Id value{ctx.OpLoad(type->id, pointer)};
312 return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; 342 return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value;
313 } 343 }
344 if (IsFixedFncTexture(attr)) {
345 const u32 index{FixedFncTextureAttributeIndex(attr)};
346 const Id attr_id{ctx.input_fixed_fnc_textures[index]};
347 const Id attr_ptr{AttrPointer(ctx, ctx.input_f32, vertex, attr_id, ctx.Const(element))};
348 return ctx.OpLoad(ctx.F32[1], attr_ptr);
349 }
314 switch (attr) { 350 switch (attr) {
315 case IR::Attribute::PrimitiveId: 351 case IR::Attribute::PrimitiveId:
316 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id)); 352 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.primitive_id));
@@ -320,6 +356,13 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
320 case IR::Attribute::PositionW: 356 case IR::Attribute::PositionW:
321 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, 357 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position,
322 ctx.Const(element))); 358 ctx.Const(element)));
359 case IR::Attribute::ColorFrontDiffuseR:
360 case IR::Attribute::ColorFrontDiffuseG:
361 case IR::Attribute::ColorFrontDiffuseB:
362 case IR::Attribute::ColorFrontDiffuseA: {
363 return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_front_color,
364 ctx.Const(element)));
365 }
323 case IR::Attribute::InstanceId: 366 case IR::Attribute::InstanceId:
324 if (ctx.profile.support_vertex_instance_id) { 367 if (ctx.profile.support_vertex_instance_id) {
325 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id)); 368 return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id));
@@ -337,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
337 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); 380 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
338 } 381 }
339 case IR::Attribute::FrontFace: 382 case IR::Attribute::FrontFace:
340 return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), 383 return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
341 ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); 384 ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
385 ctx.f32_zero_value);
342 case IR::Attribute::PointSpriteS: 386 case IR::Attribute::PointSpriteS:
343 return ctx.OpLoad(ctx.F32[1], 387 return ctx.OpLoad(ctx.F32[1],
344 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); 388 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
@@ -433,7 +477,13 @@ void EmitSetSampleMask(EmitContext& ctx, Id value) {
433} 477}
434 478
435void EmitSetFragDepth(EmitContext& ctx, Id value) { 479void EmitSetFragDepth(EmitContext& ctx, Id value) {
436 ctx.OpStore(ctx.frag_depth, value); 480 if (!ctx.runtime_info.convert_depth_mode) {
481 ctx.OpStore(ctx.frag_depth, value);
482 return;
483 }
484 const Id unit{ctx.Const(0.5f)};
485 const Id new_depth{ctx.OpFma(ctx.F32[1], value, unit, unit)};
486 ctx.OpStore(ctx.frag_depth, new_depth);
437} 487}
438 488
439void EmitGetZFlag(EmitContext&) { 489void EmitGetZFlag(EmitContext&) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
index 78b1e1ba7..cef52c56e 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
@@ -7,8 +7,13 @@
7 7
8namespace Shader::Backend::SPIRV { 8namespace Shader::Backend::SPIRV {
9namespace { 9namespace {
10Id GetThreadId(EmitContext& ctx) {
11 return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
12}
13
10Id WarpExtract(EmitContext& ctx, Id value) { 14Id WarpExtract(EmitContext& ctx, Id value) {
11 const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 15 const Id thread_id{GetThreadId(ctx)};
16 const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))};
12 return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); 17 return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index);
13} 18}
14 19
@@ -48,10 +53,17 @@ Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) {
48 return ctx.OpSelect(ctx.U32[1], in_range, 53 return ctx.OpSelect(ctx.U32[1], in_range,
49 ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); 54 ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value);
50} 55}
56
57Id GetUpperClamp(EmitContext& ctx, Id invocation_id, Id clamp) {
58 const Id thirty_two{ctx.Const(32u)};
59 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, invocation_id, thirty_two)};
60 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
61 return ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
62}
51} // Anonymous namespace 63} // Anonymous namespace
52 64
53Id EmitLaneId(EmitContext& ctx) { 65Id EmitLaneId(EmitContext& ctx) {
54 const Id id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 66 const Id id{GetThreadId(ctx)};
55 if (!ctx.profile.warp_size_potentially_larger_than_guest) { 67 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
56 return id; 68 return id;
57 } 69 }
@@ -123,7 +135,15 @@ Id EmitSubgroupGeMask(EmitContext& ctx) {
123Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 135Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
124 Id segmentation_mask) { 136 Id segmentation_mask) {
125 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; 137 const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)};
126 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 138 const Id thread_id{GetThreadId(ctx)};
139 if (ctx.profile.warp_size_potentially_larger_than_guest) {
140 const Id thirty_two{ctx.Const(32u)};
141 const Id is_upper_partition{ctx.OpSGreaterThanEqual(ctx.U1, thread_id, thirty_two)};
142 const Id upper_index{ctx.OpIAdd(ctx.U32[1], thirty_two, index)};
143 const Id upper_clamp{ctx.OpIAdd(ctx.U32[1], thirty_two, clamp)};
144 index = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_index, index);
145 clamp = ctx.OpSelect(ctx.U32[1], is_upper_partition, upper_clamp, clamp);
146 }
127 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; 147 const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)};
128 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; 148 const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)};
129 149
@@ -137,7 +157,10 @@ Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id cla
137 157
138Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 158Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
139 Id segmentation_mask) { 159 Id segmentation_mask) {
140 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 160 const Id thread_id{GetThreadId(ctx)};
161 if (ctx.profile.warp_size_potentially_larger_than_guest) {
162 clamp = GetUpperClamp(ctx, thread_id, clamp);
163 }
141 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 164 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
142 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; 165 const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)};
143 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 166 const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -148,7 +171,10 @@ Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
148 171
149Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 172Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
150 Id segmentation_mask) { 173 Id segmentation_mask) {
151 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 174 const Id thread_id{GetThreadId(ctx)};
175 if (ctx.profile.warp_size_potentially_larger_than_guest) {
176 clamp = GetUpperClamp(ctx, thread_id, clamp);
177 }
152 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 178 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
153 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; 179 const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)};
154 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 180 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
@@ -159,7 +185,10 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
159 185
160Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, 186Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
161 Id segmentation_mask) { 187 Id segmentation_mask) {
162 const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; 188 const Id thread_id{GetThreadId(ctx)};
189 if (ctx.profile.warp_size_potentially_larger_than_guest) {
190 clamp = GetUpperClamp(ctx, thread_id, clamp);
191 }
163 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; 192 const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)};
164 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; 193 const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)};
165 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; 194 const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 8b3e0a15c..69eeaa3e6 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
20#include "shader_recompiler/frontend/maxwell/decode.h" 20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" 21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h" 22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/host_translate_info.h"
23#include "shader_recompiler/object_pool.h" 24#include "shader_recompiler/object_pool.h"
24 25
25namespace Shader::Maxwell { 26namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
652public: 653public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, 654 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, 655 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_) 656 IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, 657 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} { 658 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr); 659 Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
660 IR::Block& first_block{*syntax_list.front().data.block}; 661 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin()); 662 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue(); 663 ir.Prologue();
664 if (uses_demote_to_helper && host_info.needs_demote_reorder) {
665 DemoteCombinationPass();
666 }
663 } 667 }
664 668
665private: 669private:
@@ -809,7 +813,14 @@ private:
809 } 813 }
810 case StatementType::Return: { 814 case StatementType::Return: {
811 ensure_block(); 815 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue(); 816 IR::Block* return_block{block_pool.Create(inst_pool)};
817 IR::IREmitter{*return_block}.Epilogue();
818 current_block->AddBranch(return_block);
819
820 auto& merge{syntax_list.emplace_back()};
821 merge.type = IR::AbstractSyntaxNode::Type::Block;
822 merge.data.block = return_block;
823
813 current_block = nullptr; 824 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; 825 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break; 826 break;
@@ -824,6 +835,7 @@ private:
824 auto& merge{syntax_list.emplace_back()}; 835 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block; 836 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block; 837 merge.data.block = demote_block;
838 uses_demote_to_helper = true;
827 break; 839 break;
828 } 840 }
829 case StatementType::Unreachable: { 841 case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
855 return block_pool.Create(inst_pool); 867 return block_pool.Create(inst_pool);
856 } 868 }
857 869
870 void DemoteCombinationPass() {
871 using Type = IR::AbstractSyntaxNode::Type;
872 std::vector<IR::Block*> demote_blocks;
873 std::vector<IR::U1> demote_conds;
874 u32 num_epilogues{};
875 u32 branch_depth{};
876 for (const IR::AbstractSyntaxNode& node : syntax_list) {
877 if (node.type == Type::If) {
878 ++branch_depth;
879 }
880 if (node.type == Type::EndIf) {
881 --branch_depth;
882 }
883 if (node.type != Type::Block) {
884 continue;
885 }
886 if (branch_depth > 1) {
887 // Skip reordering nested demote branches.
888 continue;
889 }
890 for (const IR::Inst& inst : node.data.block->Instructions()) {
891 const IR::Opcode op{inst.GetOpcode()};
892 if (op == IR::Opcode::DemoteToHelperInvocation) {
893 demote_blocks.push_back(node.data.block);
894 break;
895 }
896 if (op == IR::Opcode::Epilogue) {
897 ++num_epilogues;
898 }
899 }
900 }
901 if (demote_blocks.size() == 0) {
902 return;
903 }
904 if (num_epilogues > 1) {
905 LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
906 return;
907 }
908 s64 last_iterator_offset{};
909 auto& asl{syntax_list};
910 for (const IR::Block* demote_block : demote_blocks) {
911 const auto start_it{asl.begin() + last_iterator_offset};
912 auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
913 return asn.type == Type::If && asn.data.if_node.body == demote_block;
914 })};
915 if (asl_it == asl.end()) {
916 // Demote without a conditional branch.
917 // No need to proceed since all fragment instances will be demoted regardless.
918 return;
919 }
920 const IR::Block* const end_if = asl_it->data.if_node.merge;
921 demote_conds.push_back(asl_it->data.if_node.cond);
922 last_iterator_offset = std::distance(asl.begin(), asl_it);
923
924 asl_it = asl.erase(asl_it);
925 asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
926 return asn.type == Type::Block && asn.data.block == demote_block;
927 });
928
929 asl_it = asl.erase(asl_it);
930 asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
931 return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
932 });
933 asl_it = asl.erase(asl_it);
934 }
935 const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
936 if (asn.type != Type::Block) {
937 return false;
938 }
939 for (const auto& inst : asn.data.block->Instructions()) {
940 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
941 return true;
942 }
943 }
944 return false;
945 }};
946 const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
947 const auto return_block_it{(reverse_it + 1).base()};
948
949 IR::IREmitter ir{*(return_block_it - 1)->data.block};
950 IR::U1 cond(IR::Value(false));
951 for (const auto& demote_cond : demote_conds) {
952 cond = ir.LogicalOr(cond, demote_cond);
953 }
954 cond.Inst()->DestructiveAddUsage(1);
955
956 IR::AbstractSyntaxNode demote_if_node{};
957 demote_if_node.type = Type::If;
958 demote_if_node.data.if_node.cond = cond;
959 demote_if_node.data.if_node.body = demote_blocks[0];
960 demote_if_node.data.if_node.merge = return_block_it->data.block;
961
962 IR::AbstractSyntaxNode demote_node{};
963 demote_node.type = Type::Block;
964 demote_node.data.block = demote_blocks[0];
965
966 IR::AbstractSyntaxNode demote_endif_node{};
967 demote_endif_node.type = Type::EndIf;
968 demote_endif_node.data.end_if.merge = return_block_it->data.block;
969
970 asl.insert(return_block_it, demote_endif_node);
971 asl.insert(return_block_it, demote_node);
972 asl.insert(return_block_it, demote_if_node);
973 }
974
858 ObjectPool<Statement>& stmt_pool; 975 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool; 976 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool; 977 ObjectPool<IR::Block>& block_pool;
861 Environment& env; 978 Environment& env;
862 IR::AbstractSyntaxList& syntax_list; 979 IR::AbstractSyntaxList& syntax_list;
980 bool uses_demote_to_helper{};
863 981
864// TODO: C++20 Remove this when all compilers support constexpr std::vector 982// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907 983#if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
871} // Anonymous namespace 989} // Anonymous namespace
872 990
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 991IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) { 992 Environment& env, Flow::CFG& cfg,
993 const HostTranslateInfo& host_info) {
875 ObjectPool<Statement> stmt_pool{64}; 994 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool}; 995 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()}; 996 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list; 997 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; 998 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
880 return syntax_list; 999 return syntax_list;
881} 1000}
882 1001
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index 88b083649..e38158da3 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
11#include "shader_recompiler/frontend/maxwell/control_flow.h" 11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h" 12#include "shader_recompiler/object_pool.h"
13 13
14namespace Shader::Maxwell { 14namespace Shader {
15struct HostTranslateInfo;
16namespace Maxwell {
15 17
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, 18[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env, 19 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg); 20 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19 21
20} // namespace Shader::Maxwell 22} // namespace Maxwell
23} // namespace Shader
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index c067d459c..012d55357 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { 131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program; 132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); 133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
134 program.blocks = GenerateBlocks(program.syntax_list); 134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front()); 135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage(); 136 program.stage = env.ShaderStage();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 94a584219..96468b2e7 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {
11 11
12/// Misc information about the host 12/// Misc information about the host
13struct HostTranslateInfo { 13struct HostTranslateInfo {
14 bool support_float16{}; ///< True when the device supports 16-bit floats 14 bool support_float16{}; ///< True when the device supports 16-bit floats
15 bool support_int64{}; ///< True when the device supports 64-bit integers 15 bool support_int64{}; ///< True when the device supports 64-bit integers
16 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16}; 17};
17 18
18} // namespace Shader 19} // namespace Shader
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp
index 44ad10d43..225c238fb 100644
--- a/src/shader_recompiler/ir_opt/texture_pass.cpp
+++ b/src/shader_recompiler/ir_opt/texture_pass.cpp
@@ -492,7 +492,8 @@ void TexturePass(Environment& env, IR::Program& program) {
492 const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)}; 492 const auto insert_point{IR::Block::InstructionList::s_iterator_to(*inst)};
493 IR::IREmitter ir{*texture_inst.block, insert_point}; 493 IR::IREmitter ir{*texture_inst.block, insert_point};
494 const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))}; 494 const IR::U32 shift{ir.Imm32(std::countr_zero(DESCRIPTOR_SIZE))};
495 inst->SetArg(0, ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift)); 495 inst->SetArg(0, ir.SMin(ir.ShiftRightArithmetic(cbuf.dynamic_offset, shift),
496 ir.Imm32(DESCRIPTOR_SIZE - 1)));
496 } else { 497 } else {
497 inst->SetArg(0, IR::Value{}); 498 inst->SetArg(0, IR::Value{});
498 } 499 }
diff --git a/src/shader_recompiler/object_pool.h b/src/shader_recompiler/object_pool.h
index f3b12d04b..a12ddcc8f 100644
--- a/src/shader_recompiler/object_pool.h
+++ b/src/shader_recompiler/object_pool.h
@@ -11,14 +11,16 @@
11namespace Shader { 11namespace Shader {
12 12
13template <typename T> 13template <typename T>
14requires std::is_destructible_v<T> class ObjectPool { 14requires std::is_destructible_v<T>
15class ObjectPool {
15public: 16public:
16 explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} { 17 explicit ObjectPool(size_t chunk_size = 8192) : new_chunk_size{chunk_size} {
17 node = &chunks.emplace_back(new_chunk_size); 18 node = &chunks.emplace_back(new_chunk_size);
18 } 19 }
19 20
20 template <typename... Args> 21 template <typename... Args>
21 requires std::is_constructible_v<T, Args...>[[nodiscard]] T* Create(Args&&... args) { 22 requires std::is_constructible_v<T, Args...>
23 [[nodiscard]] T* Create(Args&&... args) {
22 return std::construct_at(Memory(), std::forward<Args>(args)...); 24 return std::construct_at(Memory(), std::forward<Args>(args)...);
23 } 25 }
24 26