summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
diff options
context:
space:
mode:
authorGravatar bunnei2021-07-25 11:39:04 -0700
committerGravatar GitHub2021-07-25 11:39:04 -0700
commit98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch)
tree816faa96c2c4d291825063433331a8ea4b3d08f1 /src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
parentMerge pull request #6699 from lat9nq/common-threads (diff)
parentshader: Support out of bound local memory reads and immediate writes (diff)
downloadyuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz
yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip
Merge pull request #6585 from ameerj/hades
Shader Decompiler Rewrite
Diffstat (limited to 'src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp')
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp217
1 files changed, 217 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
new file mode 100644
index 000000000..a982dd8a2
--- /dev/null
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -0,0 +1,217 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/backend/glsl/emit_context.h"
8#include "shader_recompiler/backend/glsl/emit_glsl_instructions.h"
9#include "shader_recompiler/frontend/ir/value.h"
10#include "shader_recompiler/profile.h"
11
12namespace Shader::Backend::GLSL {
13namespace {
14void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) {
15 IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)};
16 if (!in_bounds) {
17 return;
18 }
19 ctx.AddU1("{}=shfl_in_bounds;", *in_bounds);
20 in_bounds->Invalidate();
21}
22
23std::string ComputeMinThreadId(std::string_view thread_id, std::string_view segmentation_mask) {
24 return fmt::format("({}&{})", thread_id, segmentation_mask);
25}
26
27std::string ComputeMaxThreadId(std::string_view min_thread_id, std::string_view clamp,
28 std::string_view not_seg_mask) {
29 return fmt::format("({})|({}&{})", min_thread_id, clamp, not_seg_mask);
30}
31
32std::string GetMaxThreadId(std::string_view thread_id, std::string_view clamp,
33 std::string_view segmentation_mask) {
34 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
35 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
36 return ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask);
37}
38
39void UseShuffleNv(EmitContext& ctx, IR::Inst& inst, std::string_view shfl_op,
40 std::string_view value, std::string_view index,
41 [[maybe_unused]] std::string_view clamp, std::string_view segmentation_mask) {
42 const auto width{fmt::format("32u>>(bitCount({}&31u))", segmentation_mask)};
43 ctx.AddU32("{}={}({},{},{},shfl_in_bounds);", inst, shfl_op, value, index, width);
44 SetInBoundsFlag(ctx, inst);
45}
46} // Anonymous namespace
47
48void EmitLaneId(EmitContext& ctx, IR::Inst& inst) {
49 ctx.AddU32("{}=gl_SubGroupInvocationARB&31u;", inst);
50}
51
52void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
53 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
54 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
55 } else {
56 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
57 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
58 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
59 }
60}
61
62void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
63 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
64 ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
65 } else {
66 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
67 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
68 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
69 }
70}
71
72void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
73 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
74 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
75 } else {
76 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubGroupInvocationARB]")};
77 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubGroupInvocationARB]", pred)};
78 const auto value{fmt::format("({}^{})", ballot, active_mask)};
79 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
80 }
81}
82
83void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
84 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
85 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
86 } else {
87 ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubGroupInvocationARB];", inst, pred);
88 }
89}
90
91void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
92 ctx.AddU32("{}=uint(gl_SubGroupEqMaskARB.x);", inst);
93}
94
95void EmitSubgroupLtMask(EmitContext& ctx, IR::Inst& inst) {
96 ctx.AddU32("{}=uint(gl_SubGroupLtMaskARB.x);", inst);
97}
98
99void EmitSubgroupLeMask(EmitContext& ctx, IR::Inst& inst) {
100 ctx.AddU32("{}=uint(gl_SubGroupLeMaskARB.x);", inst);
101}
102
103void EmitSubgroupGtMask(EmitContext& ctx, IR::Inst& inst) {
104 ctx.AddU32("{}=uint(gl_SubGroupGtMaskARB.x);", inst);
105}
106
107void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) {
108 ctx.AddU32("{}=uint(gl_SubGroupGeMaskARB.x);", inst);
109}
110
111void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value,
112 std::string_view index, std::string_view clamp,
113 std::string_view segmentation_mask) {
114 if (ctx.profile.support_gl_warp_intrinsics) {
115 UseShuffleNv(ctx, inst, "shuffleNV", value, index, clamp, segmentation_mask);
116 return;
117 }
118 const auto not_seg_mask{fmt::format("(~{})", segmentation_mask)};
119 const auto thread_id{"gl_SubGroupInvocationARB"};
120 const auto min_thread_id{ComputeMinThreadId(thread_id, segmentation_mask)};
121 const auto max_thread_id{ComputeMaxThreadId(min_thread_id, clamp, not_seg_mask)};
122
123 const auto lhs{fmt::format("({}&{})", index, not_seg_mask)};
124 const auto src_thread_id{fmt::format("({})|({})", lhs, min_thread_id)};
125 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
126 SetInBoundsFlag(ctx, inst);
127 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
128}
129
130void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index,
131 std::string_view clamp, std::string_view segmentation_mask) {
132 if (ctx.profile.support_gl_warp_intrinsics) {
133 UseShuffleNv(ctx, inst, "shuffleUpNV", value, index, clamp, segmentation_mask);
134 return;
135 }
136 const auto thread_id{"gl_SubGroupInvocationARB"};
137 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
138 const auto src_thread_id{fmt::format("({}-{})", thread_id, index)};
139 ctx.Add("shfl_in_bounds=int({})>=int({});", src_thread_id, max_thread_id);
140 SetInBoundsFlag(ctx, inst);
141 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
142}
143
144void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value,
145 std::string_view index, std::string_view clamp,
146 std::string_view segmentation_mask) {
147 if (ctx.profile.support_gl_warp_intrinsics) {
148 UseShuffleNv(ctx, inst, "shuffleDownNV", value, index, clamp, segmentation_mask);
149 return;
150 }
151 const auto thread_id{"gl_SubGroupInvocationARB"};
152 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
153 const auto src_thread_id{fmt::format("({}+{})", thread_id, index)};
154 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
155 SetInBoundsFlag(ctx, inst);
156 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
157}
158
159void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value,
160 std::string_view index, std::string_view clamp,
161 std::string_view segmentation_mask) {
162 if (ctx.profile.support_gl_warp_intrinsics) {
163 UseShuffleNv(ctx, inst, "shuffleXorNV", value, index, clamp, segmentation_mask);
164 return;
165 }
166 const auto thread_id{"gl_SubGroupInvocationARB"};
167 const auto max_thread_id{GetMaxThreadId(thread_id, clamp, segmentation_mask)};
168 const auto src_thread_id{fmt::format("({}^{})", thread_id, index)};
169 ctx.Add("shfl_in_bounds=int({})<=int({});", src_thread_id, max_thread_id);
170 SetInBoundsFlag(ctx, inst);
171 ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},{}):{};", inst, value, src_thread_id, value);
172}
173
174void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, std::string_view op_a, std::string_view op_b,
175 std::string_view swizzle) {
176 const auto mask{fmt::format("({}>>((gl_SubGroupInvocationARB&3)<<1))&3", swizzle)};
177 const std::string modifier_a = fmt::format("FSWZ_A[{}]", mask);
178 const std::string modifier_b = fmt::format("FSWZ_B[{}]", mask);
179 ctx.AddF32("{}=({}*{})+({}*{});", inst, op_a, modifier_a, op_b, modifier_b);
180}
181
182void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
183 if (ctx.profile.support_gl_derivative_control) {
184 ctx.AddF32("{}=dFdxFine({});", inst, op_a);
185 } else {
186 LOG_WARNING(Shader_GLSL, "Device does not support dFdxFine, fallback to dFdx");
187 ctx.AddF32("{}=dFdx({});", inst, op_a);
188 }
189}
190
191void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
192 if (ctx.profile.support_gl_derivative_control) {
193 ctx.AddF32("{}=dFdyFine({});", inst, op_a);
194 } else {
195 LOG_WARNING(Shader_GLSL, "Device does not support dFdyFine, fallback to dFdy");
196 ctx.AddF32("{}=dFdy({});", inst, op_a);
197 }
198}
199
200void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
201 if (ctx.profile.support_gl_derivative_control) {
202 ctx.AddF32("{}=dFdxCoarse({});", inst, op_a);
203 } else {
204 LOG_WARNING(Shader_GLSL, "Device does not support dFdxCoarse, fallback to dFdx");
205 ctx.AddF32("{}=dFdx({});", inst, op_a);
206 }
207}
208
209void EmitDPdyCoarse(EmitContext& ctx, IR::Inst& inst, std::string_view op_a) {
210 if (ctx.profile.support_gl_derivative_control) {
211 ctx.AddF32("{}=dFdyCoarse({});", inst, op_a);
212 } else {
213 LOG_WARNING(Shader_GLSL, "Device does not support dFdyCoarse, fallback to dFdy");
214 ctx.AddF32("{}=dFdy({});", inst, op_a);
215 }
216}
217} // namespace Shader::Backend::GLSL