summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/backend/glasm/emit_context.cpp13
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm.h5
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp351
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp380
-rw-r--r--src/shader_recompiler/profile.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h12
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp30
17 files changed, 503 insertions, 437 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index becdb7d54..d6d8e5f59 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC
4 backend/glasm/emit_context.h 4 backend/glasm/emit_context.h
5 backend/glasm/emit_glasm.cpp 5 backend/glasm/emit_glasm.cpp
6 backend/glasm/emit_glasm.h 6 backend/glasm/emit_glasm.h
7 backend/glasm/emit_glasm_atomic.cpp
8 backend/glasm/emit_glasm_barriers.cpp 7 backend/glasm/emit_glasm_barriers.cpp
9 backend/glasm/emit_glasm_bitwise_conversion.cpp 8 backend/glasm/emit_glasm_bitwise_conversion.cpp
10 backend/glasm/emit_glasm_composite.cpp 9 backend/glasm/emit_glasm_composite.cpp
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp
index b5b0e2204..e18526816 100644
--- a/src/shader_recompiler/backend/glasm/emit_context.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_context.cpp
@@ -7,6 +7,7 @@
7#include "shader_recompiler/backend/bindings.h" 7#include "shader_recompiler/backend/bindings.h"
8#include "shader_recompiler/backend/glasm/emit_context.h" 8#include "shader_recompiler/backend/glasm/emit_context.h"
9#include "shader_recompiler/frontend/ir/program.h" 9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/profile.h"
10 11
11namespace Shader::Backend::GLASM { 12namespace Shader::Backend::GLASM {
12namespace { 13namespace {
@@ -40,13 +41,21 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
40 Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); 41 Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index);
41 ++cbuf_index; 42 ++cbuf_index;
42 } 43 }
44 u32 ssbo_index{};
43 for (const auto& desc : info.storage_buffers_descriptors) { 45 for (const auto& desc : info.storage_buffers_descriptors) {
44 if (desc.count != 1) { 46 if (desc.count != 1) {
45 throw NotImplementedException("Storage buffer descriptor array"); 47 throw NotImplementedException("Storage buffer descriptor array");
46 } 48 }
49 if (runtime_info.glasm_use_storage_buffers) {
50 Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer);
51 ++bindings.storage_buffer;
52 ++ssbo_index;
53 }
47 } 54 }
48 if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { 55 if (!runtime_info.glasm_use_storage_buffers) {
49 Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); 56 if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) {
57 Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1);
58 }
50 } 59 }
51 stage = program.stage; 60 stage = program.stage;
52 switch (program.stage) { 61 switch (program.stage) {
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h
index 3d02d873e..3df32a4a6 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm.h
+++ b/src/shader_recompiler/backend/glasm/emit_glasm.h
@@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM {
15[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, 15[[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
16 IR::Program& program, Bindings& bindings); 16 IR::Program& program, Bindings& bindings);
17 17
18[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { 18[[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info,
19 IR::Program& program) {
19 Bindings binding; 20 Bindings binding;
20 return EmitGLASM(profile, {}, program, binding); 21 return EmitGLASM(profile, runtime_info, program, binding);
21} 22}
22 23
23} // namespace Shader::Backend::GLASM 24} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp
deleted file mode 100644
index e72b252a3..000000000
--- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp
+++ /dev/null
@@ -1,351 +0,0 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/glasm/emit_context.h"
6#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::Backend::GLASM {
10namespace {
11void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
12 std::string_view then_expr, std::string_view else_expr = {}) {
13 // Operate on bindless SSBO, call the expression with bounds checking
14 // address = c[binding].xy
15 // length = c[binding].z
16 const u32 sb_binding{binding.U32()};
17 ctx.Add("PK64.U DC,c[{}];" // pointer = address
18 "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset)
19 "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset
20 "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length
21 sb_binding, offset, offset, sb_binding);
22 if (else_expr.empty()) {
23 ctx.Add("IF NE.x;{}ENDIF;", then_expr);
24 } else {
25 ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr);
26 }
27}
28
29template <typename ValueType>
30void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
31 ValueType value, std::string_view operation, std::string_view size) {
32 const Register ret{ctx.reg_alloc.Define(inst)};
33 StorageOp(ctx, binding, offset,
34 fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
35}
36} // namespace
37
38void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
39 ScalarU32 value) {
40 ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
41}
42
43void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
44 ScalarS32 value) {
45 ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
46}
47
48void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
49 ScalarU32 value) {
50 ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
51}
52
53void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
54 ScalarS32 value) {
55 ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
56}
57
58void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
59 ScalarU32 value) {
60 ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
61}
62
63void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
64 ScalarU32 value) {
65 ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
66}
67
68void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
69 ScalarU32 value) {
70 ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
71}
72
73void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
74 ScalarU32 value) {
75 ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
76}
77
78void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
79 ScalarU32 value) {
80 ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
81}
82
83void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
84 ScalarU32 value) {
85 ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
86}
87
88void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
89 ScalarU32 value) {
90 ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
91}
92
93void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
94 Register value) {
95 ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
96}
97
98void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
99 ScalarU32 offset, ScalarU32 value) {
100 Atom(ctx, inst, binding, offset, value, "ADD", "U32");
101}
102
103void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
104 ScalarU32 offset, ScalarS32 value) {
105 Atom(ctx, inst, binding, offset, value, "MIN", "S32");
106}
107
108void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 ScalarU32 offset, ScalarU32 value) {
110 Atom(ctx, inst, binding, offset, value, "MIN", "U32");
111}
112
113void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
114 ScalarU32 offset, ScalarS32 value) {
115 Atom(ctx, inst, binding, offset, value, "MAX", "S32");
116}
117
118void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
119 ScalarU32 offset, ScalarU32 value) {
120 Atom(ctx, inst, binding, offset, value, "MAX", "U32");
121}
122
123void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
124 ScalarU32 offset, ScalarU32 value) {
125 Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
126}
127
128void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
129 ScalarU32 offset, ScalarU32 value) {
130 Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
131}
132
133void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
134 ScalarU32 offset, ScalarU32 value) {
135 Atom(ctx, inst, binding, offset, value, "AND", "U32");
136}
137
138void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
139 ScalarU32 offset, ScalarU32 value) {
140 Atom(ctx, inst, binding, offset, value, "OR", "U32");
141}
142
143void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
144 ScalarU32 offset, ScalarU32 value) {
145 Atom(ctx, inst, binding, offset, value, "XOR", "U32");
146}
147
148void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
149 ScalarU32 offset, ScalarU32 value) {
150 Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
151}
152
153void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
154 ScalarU32 offset, Register value) {
155 Atom(ctx, inst, binding, offset, value, "ADD", "U64");
156}
157
158void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
159 ScalarU32 offset, Register value) {
160 Atom(ctx, inst, binding, offset, value, "MIN", "S64");
161}
162
163void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
164 ScalarU32 offset, Register value) {
165 Atom(ctx, inst, binding, offset, value, "MIN", "U64");
166}
167
168void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
169 ScalarU32 offset, Register value) {
170 Atom(ctx, inst, binding, offset, value, "MAX", "S64");
171}
172
173void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
174 ScalarU32 offset, Register value) {
175 Atom(ctx, inst, binding, offset, value, "MAX", "U64");
176}
177
178void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
179 ScalarU32 offset, Register value) {
180 Atom(ctx, inst, binding, offset, value, "AND", "U64");
181}
182
183void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
184 ScalarU32 offset, Register value) {
185 Atom(ctx, inst, binding, offset, value, "OR", "U64");
186}
187
188void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
189 ScalarU32 offset, Register value) {
190 Atom(ctx, inst, binding, offset, value, "XOR", "U64");
191}
192
193void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
194 ScalarU32 offset, Register value) {
195 Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
196}
197
198void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
199 ScalarU32 offset, ScalarF32 value) {
200 Atom(ctx, inst, binding, offset, value, "ADD", "F32");
201}
202
203void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
204 ScalarU32 offset, Register value) {
205 Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
206}
207
208void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
209 [[maybe_unused]] const IR::Value& binding,
210 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
211 throw NotImplementedException("GLASM instruction");
212}
213
214void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
215 ScalarU32 offset, Register value) {
216 Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
217}
218
219void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
220 [[maybe_unused]] const IR::Value& binding,
221 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
222 throw NotImplementedException("GLASM instruction");
223}
224
225void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
226 ScalarU32 offset, Register value) {
227 Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
228}
229
230void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
231 [[maybe_unused]] const IR::Value& binding,
232 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
233 throw NotImplementedException("GLASM instruction");
234}
235
236void EmitGlobalAtomicIAdd32(EmitContext&) {
237 throw NotImplementedException("GLASM instruction");
238}
239
240void EmitGlobalAtomicSMin32(EmitContext&) {
241 throw NotImplementedException("GLASM instruction");
242}
243
244void EmitGlobalAtomicUMin32(EmitContext&) {
245 throw NotImplementedException("GLASM instruction");
246}
247
248void EmitGlobalAtomicSMax32(EmitContext&) {
249 throw NotImplementedException("GLASM instruction");
250}
251
252void EmitGlobalAtomicUMax32(EmitContext&) {
253 throw NotImplementedException("GLASM instruction");
254}
255
256void EmitGlobalAtomicInc32(EmitContext&) {
257 throw NotImplementedException("GLASM instruction");
258}
259
260void EmitGlobalAtomicDec32(EmitContext&) {
261 throw NotImplementedException("GLASM instruction");
262}
263
264void EmitGlobalAtomicAnd32(EmitContext&) {
265 throw NotImplementedException("GLASM instruction");
266}
267
268void EmitGlobalAtomicOr32(EmitContext&) {
269 throw NotImplementedException("GLASM instruction");
270}
271
272void EmitGlobalAtomicXor32(EmitContext&) {
273 throw NotImplementedException("GLASM instruction");
274}
275
276void EmitGlobalAtomicExchange32(EmitContext&) {
277 throw NotImplementedException("GLASM instruction");
278}
279
280void EmitGlobalAtomicIAdd64(EmitContext&) {
281 throw NotImplementedException("GLASM instruction");
282}
283
284void EmitGlobalAtomicSMin64(EmitContext&) {
285 throw NotImplementedException("GLASM instruction");
286}
287
288void EmitGlobalAtomicUMin64(EmitContext&) {
289 throw NotImplementedException("GLASM instruction");
290}
291
292void EmitGlobalAtomicSMax64(EmitContext&) {
293 throw NotImplementedException("GLASM instruction");
294}
295
296void EmitGlobalAtomicUMax64(EmitContext&) {
297 throw NotImplementedException("GLASM instruction");
298}
299
300void EmitGlobalAtomicInc64(EmitContext&) {
301 throw NotImplementedException("GLASM instruction");
302}
303
304void EmitGlobalAtomicDec64(EmitContext&) {
305 throw NotImplementedException("GLASM instruction");
306}
307
308void EmitGlobalAtomicAnd64(EmitContext&) {
309 throw NotImplementedException("GLASM instruction");
310}
311
312void EmitGlobalAtomicOr64(EmitContext&) {
313 throw NotImplementedException("GLASM instruction");
314}
315
316void EmitGlobalAtomicXor64(EmitContext&) {
317 throw NotImplementedException("GLASM instruction");
318}
319
320void EmitGlobalAtomicExchange64(EmitContext&) {
321 throw NotImplementedException("GLASM instruction");
322}
323
324void EmitGlobalAtomicAddF32(EmitContext&) {
325 throw NotImplementedException("GLASM instruction");
326}
327
328void EmitGlobalAtomicAddF16x2(EmitContext&) {
329 throw NotImplementedException("GLASM instruction");
330}
331
332void EmitGlobalAtomicAddF32x2(EmitContext&) {
333 throw NotImplementedException("GLASM instruction");
334}
335
336void EmitGlobalAtomicMinF16x2(EmitContext&) {
337 throw NotImplementedException("GLASM instruction");
338}
339
340void EmitGlobalAtomicMinF32x2(EmitContext&) {
341 throw NotImplementedException("GLASM instruction");
342}
343
344void EmitGlobalAtomicMaxF16x2(EmitContext&) {
345 throw NotImplementedException("GLASM instruction");
346}
347
348void EmitGlobalAtomicMaxF32x2(EmitContext&) {
349 throw NotImplementedException("GLASM instruction");
350}
351} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
index 26b03587e..90dbb80d2 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp
@@ -8,6 +8,7 @@
8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" 8#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
9#include "shader_recompiler/frontend/ir/program.h" 9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/ir/value.h" 10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/profile.h"
11 12
12namespace Shader::Backend::GLASM { 13namespace Shader::Backend::GLASM {
13namespace { 14namespace {
@@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset,
29 } 30 }
30} 31}
31 32
32void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr, 33void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr,
33 std::string_view else_expr = {}) { 34 std::string_view else_expr = {}) {
34 const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; 35 const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()};
35 for (size_t index = 0; index < num_buffers; ++index) { 36 for (size_t index = 0; index < num_buffers; ++index) {
@@ -44,14 +45,22 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e
44 "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 45 "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1
45 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 46 "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1
46 "AND.U.CC RC.x,RC.x,RC.y;" 47 "AND.U.CC RC.x,RC.x,RC.y;"
47 "IF NE.x;" // a && b 48 "IF NE.x;" // a && b
48 "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr 49 "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr
49 "PK64.U DC.y,c[{}];" // host_ssbo = cbuf
50 "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
51 "{}"
52 "ELSE;",
53 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, 50 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address,
54 address, address, index, then_expr); 51 address, address);
52 if (pointer_based) {
53 ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf
54 "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset
55 "{}"
56 "ELSE;",
57 index, expr);
58 } else {
59 ctx.Add("CVT.U32.U64 RC.x,DC.x;"
60 "{},ssbo{}[RC.x];"
61 "ELSE;",
62 expr, index);
63 }
55 } 64 }
56 if (!else_expr.empty()) { 65 if (!else_expr.empty()) {
57 ctx.Add("{}", else_expr); 66 ctx.Add("{}", else_expr);
@@ -64,25 +73,54 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e
64template <typename ValueType> 73template <typename ValueType>
65void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, 74void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value,
66 std::string_view size) { 75 std::string_view size) {
67 StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); 76 if (ctx.runtime_info.glasm_use_storage_buffers) {
77 ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset);
78 } else {
79 StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value));
80 }
68} 81}
69 82
70void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, 83void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
71 std::string_view size) { 84 std::string_view size) {
72 const Register ret{ctx.reg_alloc.Define(inst)}; 85 const Register ret{ctx.reg_alloc.Define(inst)};
73 StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), 86 if (ctx.runtime_info.glasm_use_storage_buffers) {
74 fmt::format("MOV.U {},{{0,0,0,0}};", ret)); 87 ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset);
88 } else {
89 StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret),
90 fmt::format("MOV.U {},{{0,0,0,0}};", ret));
91 }
75} 92}
76 93
77template <typename ValueType> 94template <typename ValueType>
78void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { 95void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) {
79 GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value)); 96 if (ctx.runtime_info.glasm_use_storage_buffers) {
97 GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value));
98 } else {
99 GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value));
100 }
80} 101}
81 102
82void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { 103void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) {
83 const Register ret{ctx.reg_alloc.Define(inst)}; 104 const Register ret{ctx.reg_alloc.Define(inst)};
84 GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret), 105 if (ctx.runtime_info.glasm_use_storage_buffers) {
85 fmt::format("MOV.S {},0;", ret)); 106 GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret));
107 } else {
108 GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret),
109 fmt::format("MOV.S {},0;", ret));
110 }
111}
112
113template <typename ValueType>
114void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset,
115 ValueType value, std::string_view operation, std::string_view size) {
116 const Register ret{ctx.reg_alloc.Define(inst)};
117 if (ctx.runtime_info.glasm_use_storage_buffers) {
118 ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(),
119 offset);
120 } else {
121 StorageOp(ctx, binding, offset,
122 fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value));
123 }
86} 124}
87} // Anonymous namespace 125} // Anonymous namespace
88 126
@@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o
212 Write(ctx, binding, offset, value, "U32X4"); 250 Write(ctx, binding, offset, value, "U32X4");
213} 251}
214 252
253void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
254 ScalarU32 value) {
255 ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
256}
257
258void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
259 ScalarS32 value) {
260 ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
261}
262
263void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
264 ScalarU32 value) {
265 ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
266}
267
268void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
269 ScalarS32 value) {
270 ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset);
271}
272
273void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
274 ScalarU32 value) {
275 ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
276}
277
278void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
279 ScalarU32 value) {
280 ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
281}
282
283void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
284 ScalarU32 value) {
285 ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
286}
287
288void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
289 ScalarU32 value) {
290 ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
291}
292
293void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
294 ScalarU32 value) {
295 ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
296}
297
298void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
299 ScalarU32 value) {
300 ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
301}
302
303void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
304 ScalarU32 value) {
305 ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset);
306}
307
308void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset,
309 Register value) {
310 ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset);
311}
312
313void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
314 ScalarU32 offset, ScalarU32 value) {
315 Atom(ctx, inst, binding, offset, value, "ADD", "U32");
316}
317
318void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
319 ScalarU32 offset, ScalarS32 value) {
320 Atom(ctx, inst, binding, offset, value, "MIN", "S32");
321}
322
323void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
324 ScalarU32 offset, ScalarU32 value) {
325 Atom(ctx, inst, binding, offset, value, "MIN", "U32");
326}
327
328void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
329 ScalarU32 offset, ScalarS32 value) {
330 Atom(ctx, inst, binding, offset, value, "MAX", "S32");
331}
332
333void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
334 ScalarU32 offset, ScalarU32 value) {
335 Atom(ctx, inst, binding, offset, value, "MAX", "U32");
336}
337
338void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
339 ScalarU32 offset, ScalarU32 value) {
340 Atom(ctx, inst, binding, offset, value, "IWRAP", "U32");
341}
342
343void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
344 ScalarU32 offset, ScalarU32 value) {
345 Atom(ctx, inst, binding, offset, value, "DWRAP", "U32");
346}
347
348void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
349 ScalarU32 offset, ScalarU32 value) {
350 Atom(ctx, inst, binding, offset, value, "AND", "U32");
351}
352
353void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
354 ScalarU32 offset, ScalarU32 value) {
355 Atom(ctx, inst, binding, offset, value, "OR", "U32");
356}
357
358void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
359 ScalarU32 offset, ScalarU32 value) {
360 Atom(ctx, inst, binding, offset, value, "XOR", "U32");
361}
362
363void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
364 ScalarU32 offset, ScalarU32 value) {
365 Atom(ctx, inst, binding, offset, value, "EXCH", "U32");
366}
367
368void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
369 ScalarU32 offset, Register value) {
370 Atom(ctx, inst, binding, offset, value, "ADD", "U64");
371}
372
373void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
374 ScalarU32 offset, Register value) {
375 Atom(ctx, inst, binding, offset, value, "MIN", "S64");
376}
377
378void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
379 ScalarU32 offset, Register value) {
380 Atom(ctx, inst, binding, offset, value, "MIN", "U64");
381}
382
383void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
384 ScalarU32 offset, Register value) {
385 Atom(ctx, inst, binding, offset, value, "MAX", "S64");
386}
387
388void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
389 ScalarU32 offset, Register value) {
390 Atom(ctx, inst, binding, offset, value, "MAX", "U64");
391}
392
393void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
394 ScalarU32 offset, Register value) {
395 Atom(ctx, inst, binding, offset, value, "AND", "U64");
396}
397
398void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
399 ScalarU32 offset, Register value) {
400 Atom(ctx, inst, binding, offset, value, "OR", "U64");
401}
402
403void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
404 ScalarU32 offset, Register value) {
405 Atom(ctx, inst, binding, offset, value, "XOR", "U64");
406}
407
408void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
409 ScalarU32 offset, Register value) {
410 Atom(ctx, inst, binding, offset, value, "EXCH", "U64");
411}
412
413void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
414 ScalarU32 offset, ScalarF32 value) {
415 Atom(ctx, inst, binding, offset, value, "ADD", "F32");
416}
417
418void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
419 ScalarU32 offset, Register value) {
420 Atom(ctx, inst, binding, offset, value, "ADD", "F16x2");
421}
422
423void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
424 [[maybe_unused]] const IR::Value& binding,
425 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
426 throw NotImplementedException("GLASM instruction");
427}
428
429void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
430 ScalarU32 offset, Register value) {
431 Atom(ctx, inst, binding, offset, value, "MIN", "F16x2");
432}
433
434void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
435 [[maybe_unused]] const IR::Value& binding,
436 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
437 throw NotImplementedException("GLASM instruction");
438}
439
440void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
441 ScalarU32 offset, Register value) {
442 Atom(ctx, inst, binding, offset, value, "MAX", "F16x2");
443}
444
445void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst,
446 [[maybe_unused]] const IR::Value& binding,
447 [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) {
448 throw NotImplementedException("GLASM instruction");
449}
450
451void EmitGlobalAtomicIAdd32(EmitContext&) {
452 throw NotImplementedException("GLASM instruction");
453}
454
455void EmitGlobalAtomicSMin32(EmitContext&) {
456 throw NotImplementedException("GLASM instruction");
457}
458
459void EmitGlobalAtomicUMin32(EmitContext&) {
460 throw NotImplementedException("GLASM instruction");
461}
462
463void EmitGlobalAtomicSMax32(EmitContext&) {
464 throw NotImplementedException("GLASM instruction");
465}
466
467void EmitGlobalAtomicUMax32(EmitContext&) {
468 throw NotImplementedException("GLASM instruction");
469}
470
471void EmitGlobalAtomicInc32(EmitContext&) {
472 throw NotImplementedException("GLASM instruction");
473}
474
475void EmitGlobalAtomicDec32(EmitContext&) {
476 throw NotImplementedException("GLASM instruction");
477}
478
479void EmitGlobalAtomicAnd32(EmitContext&) {
480 throw NotImplementedException("GLASM instruction");
481}
482
483void EmitGlobalAtomicOr32(EmitContext&) {
484 throw NotImplementedException("GLASM instruction");
485}
486
487void EmitGlobalAtomicXor32(EmitContext&) {
488 throw NotImplementedException("GLASM instruction");
489}
490
491void EmitGlobalAtomicExchange32(EmitContext&) {
492 throw NotImplementedException("GLASM instruction");
493}
494
495void EmitGlobalAtomicIAdd64(EmitContext&) {
496 throw NotImplementedException("GLASM instruction");
497}
498
499void EmitGlobalAtomicSMin64(EmitContext&) {
500 throw NotImplementedException("GLASM instruction");
501}
502
503void EmitGlobalAtomicUMin64(EmitContext&) {
504 throw NotImplementedException("GLASM instruction");
505}
506
507void EmitGlobalAtomicSMax64(EmitContext&) {
508 throw NotImplementedException("GLASM instruction");
509}
510
511void EmitGlobalAtomicUMax64(EmitContext&) {
512 throw NotImplementedException("GLASM instruction");
513}
514
515void EmitGlobalAtomicInc64(EmitContext&) {
516 throw NotImplementedException("GLASM instruction");
517}
518
519void EmitGlobalAtomicDec64(EmitContext&) {
520 throw NotImplementedException("GLASM instruction");
521}
522
523void EmitGlobalAtomicAnd64(EmitContext&) {
524 throw NotImplementedException("GLASM instruction");
525}
526
527void EmitGlobalAtomicOr64(EmitContext&) {
528 throw NotImplementedException("GLASM instruction");
529}
530
531void EmitGlobalAtomicXor64(EmitContext&) {
532 throw NotImplementedException("GLASM instruction");
533}
534
535void EmitGlobalAtomicExchange64(EmitContext&) {
536 throw NotImplementedException("GLASM instruction");
537}
538
539void EmitGlobalAtomicAddF32(EmitContext&) {
540 throw NotImplementedException("GLASM instruction");
541}
542
543void EmitGlobalAtomicAddF16x2(EmitContext&) {
544 throw NotImplementedException("GLASM instruction");
545}
546
547void EmitGlobalAtomicAddF32x2(EmitContext&) {
548 throw NotImplementedException("GLASM instruction");
549}
550
551void EmitGlobalAtomicMinF16x2(EmitContext&) {
552 throw NotImplementedException("GLASM instruction");
553}
554
555void EmitGlobalAtomicMinF32x2(EmitContext&) {
556 throw NotImplementedException("GLASM instruction");
557}
558
559void EmitGlobalAtomicMaxF16x2(EmitContext&) {
560 throw NotImplementedException("GLASM instruction");
561}
562
563void EmitGlobalAtomicMaxF32x2(EmitContext&) {
564 throw NotImplementedException("GLASM instruction");
565}
566
215} // namespace Shader::Backend::GLASM 567} // namespace Shader::Backend::GLASM
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index c46452c3d..f8913bf14 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -111,7 +111,10 @@ struct RuntimeInfo {
111 std::optional<CompareFunction> alpha_test_func; 111 std::optional<CompareFunction> alpha_test_func;
112 float alpha_test_reference{}; 112 float alpha_test_reference{};
113 113
114 // Static y negate value
114 bool y_negate{}; 115 bool y_negate{};
116 // Use storage buffers instead of global pointers on GLASM
117 bool glasm_use_storage_buffers{};
115 118
116 std::vector<TransformFeedbackVarying> xfb_varyings; 119 std::vector<TransformFeedbackVarying> xfb_varyings;
117}; 120};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2d0ef1307..334ed470f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf
195 195
196void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, 196void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer,
197 u32 offset, u32 size, bool is_written) { 197 u32 offset, u32 size, bool is_written) {
198 if (use_assembly_shaders) { 198 if (use_storage_buffers) {
199 const GLuint base_binding = graphics_base_storage_bindings[stage];
200 const GLuint binding = base_binding + binding_index;
201 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
202 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
203 } else {
199 const BindlessSSBO ssbo{ 204 const BindlessSSBO ssbo{
200 .address = buffer.HostGpuAddr() + offset, 205 .address = buffer.HostGpuAddr() + offset,
201 .length = static_cast<GLsizei>(size), 206 .length = static_cast<GLsizei>(size),
@@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
204 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); 209 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
205 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, 210 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
206 reinterpret_cast<const GLuint*>(&ssbo)); 211 reinterpret_cast<const GLuint*>(&ssbo));
207 } else {
208 const GLuint base_binding = graphics_base_storage_bindings[stage];
209 const GLuint binding = base_binding + binding_index;
210 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
211 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
212 } 212 }
213} 213}
214 214
215void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, 215void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset,
216 u32 size, bool is_written) { 216 u32 size, bool is_written) {
217 if (use_assembly_shaders) { 217 if (use_storage_buffers) {
218 if (size != 0) {
219 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
220 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
221 } else {
222 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
223 }
224 } else {
218 const BindlessSSBO ssbo{ 225 const BindlessSSBO ssbo{
219 .address = buffer.HostGpuAddr() + offset, 226 .address = buffer.HostGpuAddr() + offset,
220 .length = static_cast<GLsizei>(size), 227 .length = static_cast<GLsizei>(size),
@@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf
223 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); 230 buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY);
224 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, 231 glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1,
225 reinterpret_cast<const GLuint*>(&ssbo)); 232 reinterpret_cast<const GLuint*>(&ssbo));
226 } else if (size == 0) {
227 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0);
228 } else {
229 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(),
230 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
231 } 233 }
232} 234}
233 235
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 4986c65fd..bc16abafb 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -147,6 +147,10 @@ public:
147 image_handles = image_handles_; 147 image_handles = image_handles_;
148 } 148 }
149 149
150 void SetEnableStorageBuffers(bool use_storage_buffers_) {
151 use_storage_buffers = use_storage_buffers_;
152 }
153
150private: 154private:
151 static constexpr std::array PABO_LUT{ 155 static constexpr std::array PABO_LUT{
152 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 156 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -160,6 +164,8 @@ private:
160 bool use_assembly_shaders = false; 164 bool use_assembly_shaders = false;
161 bool has_unified_vertex_buffers = false; 165 bool has_unified_vertex_buffers = false;
162 166
167 bool use_storage_buffers = false;
168
163 u32 max_attributes = 0; 169 u32 max_attributes = 0;
164 170
165 std::array<GLuint, 5> graphics_base_uniform_bindings{}; 171 std::array<GLuint, 5> graphics_base_uniform_bindings{};
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 700ebd8b8..5cf5f97a9 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -17,6 +17,15 @@ using VideoCommon::ImageId;
17constexpr u32 MAX_TEXTURES = 64; 17constexpr u32 MAX_TEXTURES = 64;
18constexpr u32 MAX_IMAGES = 16; 18constexpr u32 MAX_IMAGES = 16;
19 19
20template <typename Range>
21u32 AccumulateCount(const Range& range) {
22 u32 num{};
23 for (const auto& desc : range) {
24 num += desc.count;
25 }
26 return num;
27}
28
20size_t ComputePipelineKey::Hash() const noexcept { 29size_t ComputePipelineKey::Hash() const noexcept {
21 return static_cast<size_t>( 30 return static_cast<size_t>(
22 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); 31 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
@@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep
26 return std::memcmp(this, &rhs, sizeof *this) == 0; 35 return std::memcmp(this, &rhs, sizeof *this) == 0;
27} 36}
28 37
29ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, 38ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_,
30 Tegra::MemoryManager& gpu_memory_, 39 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
31 Tegra::Engines::KeplerCompute& kepler_compute_, 40 Tegra::Engines::KeplerCompute& kepler_compute_,
32 ProgramManager& program_manager_, const Shader::Info& info_, 41 ProgramManager& program_manager_, const Shader::Info& info_,
33 OGLProgram source_program_, OGLAssemblyProgram assembly_program_) 42 OGLProgram source_program_, OGLAssemblyProgram assembly_program_)
34 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, 43 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
35 kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, 44 kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_},
36 source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { 45 source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} {
37 for (const auto& desc : info.texture_buffer_descriptors) { 46
38 num_texture_buffers += desc.count; 47 num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors);
39 } 48 num_image_buffers = AccumulateCount(info.image_buffer_descriptors);
40 for (const auto& desc : info.image_buffer_descriptors) { 49
41 num_image_buffers += desc.count; 50 const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)};
42 }
43 u32 num_textures = num_texture_buffers;
44 for (const auto& desc : info.texture_descriptors) {
45 num_textures += desc.count;
46 }
47 ASSERT(num_textures <= MAX_TEXTURES); 51 ASSERT(num_textures <= MAX_TEXTURES);
48 52
49 u32 num_images = num_image_buffers; 53 const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)};
50 for (const auto& desc : info.image_descriptors) {
51 num_images += desc.count;
52 }
53 ASSERT(num_images <= MAX_IMAGES); 54 ASSERT(num_images <= MAX_IMAGES);
55
56 const bool is_glasm{assembly_program.handle != 0};
57 const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)};
58 use_storage_buffers =
59 !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks();
60 writes_global_memory = !use_storage_buffers &&
61 std::ranges::any_of(info.storage_buffers_descriptors,
62 [](const auto& desc) { return desc.is_written; });
54} 63}
55 64
56void ComputePipeline::Configure() { 65void ComputePipeline::Configure() {
@@ -150,6 +159,7 @@ void ComputePipeline::Configure() {
150 159
151 buffer_cache.UpdateComputeBuffers(); 160 buffer_cache.UpdateComputeBuffers();
152 161
162 buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
153 buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); 163 buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
154 buffer_cache.BindHostComputeBuffers(); 164 buffer_cache.BindHostComputeBuffers();
155 165
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index e3b94e2f3..dd6b62ef2 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -28,6 +28,7 @@ struct Info;
28 28
29namespace OpenGL { 29namespace OpenGL {
30 30
31class Device;
31class ProgramManager; 32class ProgramManager;
32 33
33struct ComputePipelineKey { 34struct ComputePipelineKey {
@@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>);
49 50
50class ComputePipeline { 51class ComputePipeline {
51public: 52public:
52 explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, 53 explicit ComputePipeline(const Device& device, TextureCache& texture_cache_,
53 Tegra::MemoryManager& gpu_memory_, 54 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
54 Tegra::Engines::KeplerCompute& kepler_compute_, 55 Tegra::Engines::KeplerCompute& kepler_compute_,
55 ProgramManager& program_manager_, const Shader::Info& info_, 56 ProgramManager& program_manager_, const Shader::Info& info_,
56 OGLProgram source_program_, OGLAssemblyProgram assembly_program_); 57 OGLProgram source_program_, OGLAssemblyProgram assembly_program_);
57 58
58 void Configure(); 59 void Configure();
59 60
61 [[nodiscard]] bool WritesGlobalMemory() const noexcept {
62 return writes_global_memory;
63 }
64
60private: 65private:
61 TextureCache& texture_cache; 66 TextureCache& texture_cache;
62 BufferCache& buffer_cache; 67 BufferCache& buffer_cache;
@@ -70,6 +75,9 @@ private:
70 75
71 u32 num_texture_buffers{}; 76 u32 num_texture_buffers{};
72 u32 num_image_buffers{}; 77 u32 num_image_buffers{};
78
79 bool use_storage_buffers{};
80 bool writes_global_memory{};
73}; 81};
74 82
75} // namespace OpenGL 83} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 18bbc4c1f..01da2bb57 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -135,13 +135,13 @@ Device::Device() {
135 "Beta driver 443.24 is known to have issues. There might be performance issues."); 135 "Beta driver 443.24 is known to have issues. There might be performance issues.");
136 disable_fast_buffer_sub_data = true; 136 disable_fast_buffer_sub_data = true;
137 } 137 }
138
139 max_uniform_buffers = BuildMaxUniformBuffers(); 138 max_uniform_buffers = BuildMaxUniformBuffers();
140 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 139 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
141 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 140 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
142 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 141 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
143 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 142 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
144 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); 143 max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
144 max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
145 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && 145 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
146 GLAD_GL_NV_shader_thread_shuffle; 146 GLAD_GL_NV_shader_thread_shuffle;
147 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 147 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
@@ -236,22 +236,6 @@ std::string Device::GetVendorName() const {
236 return vendor_name; 236 return vendor_name;
237} 237}
238 238
239Device::Device(std::nullptr_t) {
240 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
241 uniform_buffer_alignment = 4;
242 shader_storage_alignment = 4;
243 max_vertex_attributes = 16;
244 max_varyings = 15;
245 max_compute_shared_memory_size = 0x10000;
246 has_warp_intrinsics = true;
247 has_shader_ballot = true;
248 has_vertex_viewport_layer = true;
249 has_image_load_formatted = true;
250 has_texture_shadow_lod = true;
251 has_variable_aoffi = true;
252 has_depth_buffer_float = true;
253}
254
255bool Device::TestVariableAoffi() { 239bool Device::TestVariableAoffi() {
256 return TestProgram(R"(#version 430 core 240 return TestProgram(R"(#version 430 core
257// This is a unit test, please ignore me on apitrace bug reports. 241// This is a unit test, please ignore me on apitrace bug reports.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 152a3acd3..d67f5693c 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -13,7 +13,6 @@ namespace OpenGL {
13class Device { 13class Device {
14public: 14public:
15 explicit Device(); 15 explicit Device();
16 explicit Device(std::nullptr_t);
17 16
18 [[nodiscard]] std::string GetVendorName() const; 17 [[nodiscard]] std::string GetVendorName() const;
19 18
@@ -41,6 +40,10 @@ public:
41 return max_compute_shared_memory_size; 40 return max_compute_shared_memory_size;
42 } 41 }
43 42
43 u32 GetMaxGLASMStorageBufferBlocks() const {
44 return max_glasm_storage_buffer_blocks;
45 }
46
44 bool HasWarpIntrinsics() const { 47 bool HasWarpIntrinsics() const {
45 return has_warp_intrinsics; 48 return has_warp_intrinsics;
46 } 49 }
@@ -124,6 +127,7 @@ private:
124 u32 max_vertex_attributes{}; 127 u32 max_vertex_attributes{};
125 u32 max_varyings{}; 128 u32 max_varyings{};
126 u32 max_compute_shared_memory_size{}; 129 u32 max_compute_shared_memory_size{};
130 u32 max_glasm_storage_buffer_blocks{};
127 bool has_warp_intrinsics{}; 131 bool has_warp_intrinsics{};
128 bool has_shader_ballot{}; 132 bool has_shader_ballot{};
129 bool has_vertex_viewport_layer{}; 133 bool has_vertex_viewport_layer{};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index 32df35202..19d85c482 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64;
25constexpr u32 MAX_IMAGES = 8; 25constexpr u32 MAX_IMAGES = 8;
26 26
27template <typename Range> 27template <typename Range>
28u32 AccumulateCount(Range&& range) { 28u32 AccumulateCount(const Range& range) {
29 u32 num{}; 29 u32 num{};
30 for (const auto& desc : range) { 30 for (const auto& desc : range) {
31 num += desc.count; 31 num += desc.count;
@@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc
70 return std::memcmp(this, &rhs, Size()) == 0; 70 return std::memcmp(this, &rhs, Size()) == 0;
71} 71}
72 72
73GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, 73GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
74 Tegra::MemoryManager& gpu_memory_, 74 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
75 Tegra::Engines::Maxwell3D& maxwell3d_, 75 Tegra::Engines::Maxwell3D& maxwell3d_,
76 ProgramManager& program_manager_, StateTracker& state_tracker_, 76 ProgramManager& program_manager_, StateTracker& state_tracker_,
77 OGLProgram program_, 77 OGLProgram program_,
@@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
90 } 90 }
91 u32 num_textures{}; 91 u32 num_textures{};
92 u32 num_images{}; 92 u32 num_images{};
93 u32 num_storage_buffers{};
93 for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { 94 for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
94 const auto& info{stage_infos[stage]}; 95 const auto& info{stage_infos[stage]};
95 if (stage < 4) { 96 if (stage < 4) {
@@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu
109 110
110 num_textures += AccumulateCount(info.texture_descriptors); 111 num_textures += AccumulateCount(info.texture_descriptors);
111 num_images += AccumulateCount(info.image_descriptors); 112 num_images += AccumulateCount(info.image_descriptors);
113 num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
114
115 writes_global_memory |= std::ranges::any_of(
116 info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
112 } 117 }
113 ASSERT(num_textures <= MAX_TEXTURES); 118 ASSERT(num_textures <= MAX_TEXTURES);
114 ASSERT(num_images <= MAX_IMAGES); 119 ASSERT(num_images <= MAX_IMAGES);
115 120
116 if (assembly_programs[0].handle != 0 && xfb_state) { 121 const bool assembly_shaders{assembly_programs[0].handle != 0};
122 use_storage_buffers =
123 !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
124 writes_global_memory &= !use_storage_buffers;
125
126 if (assembly_shaders && xfb_state) {
117 GenerateTransformFeedbackState(*xfb_state); 127 GenerateTransformFeedbackState(*xfb_state);
118 } 128 }
119} 129}
@@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) {
137 147
138 buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); 148 buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
139 buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); 149 buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
150 buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
140 151
141 const auto& regs{maxwell3d.regs}; 152 const auto& regs{maxwell3d.regs};
142 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; 153 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 62f700cf5..c1113e180 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -20,6 +20,7 @@
20 20
21namespace OpenGL { 21namespace OpenGL {
22 22
23class Device;
23class ProgramManager; 24class ProgramManager;
24 25
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 26using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>);
60 61
61class GraphicsPipeline { 62class GraphicsPipeline {
62public: 63public:
63 explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, 64 explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_,
64 Tegra::MemoryManager& gpu_memory_, 65 BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
65 Tegra::Engines::Maxwell3D& maxwell3d_, 66 Tegra::Engines::Maxwell3D& maxwell3d_,
66 ProgramManager& program_manager_, StateTracker& state_tracker_, 67 ProgramManager& program_manager_, StateTracker& state_tracker_,
67 OGLProgram program_, 68 OGLProgram program_,
@@ -77,6 +78,10 @@ public:
77 } 78 }
78 } 79 }
79 80
81 [[nodiscard]] bool WritesGlobalMemory() const noexcept {
82 return writes_global_memory;
83 }
84
80private: 85private:
81 void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); 86 void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state);
82 87
@@ -99,6 +104,9 @@ private:
99 std::array<u32, 5> num_texture_buffers{}; 104 std::array<u32, 5> num_texture_buffers{};
100 std::array<u32, 5> num_image_buffers{}; 105 std::array<u32, 5> num_image_buffers{};
101 106
107 bool use_storage_buffers{};
108 bool writes_global_memory{};
109
102 static constexpr std::size_t XFB_ENTRY_STRIDE = 3; 110 static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
103 GLsizei num_xfb_attribs{}; 111 GLsizei num_xfb_attribs{};
104 GLsizei num_xfb_strides{}; 112 GLsizei num_xfb_strides{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index eec01e8c2..5d4e80364 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
268 EndTransformFeedback(); 268 EndTransformFeedback();
269 269
270 ++num_queued_commands; 270 ++num_queued_commands;
271 has_written_global_memory |= pipeline->WritesGlobalMemory();
271 272
272 gpu.TickWork(); 273 gpu.TickWork();
273} 274}
274 275
275void RasterizerOpenGL::DispatchCompute() { 276void RasterizerOpenGL::DispatchCompute() {
276 ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; 277 ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
277 if (!program) { 278 if (!pipeline) {
278 return; 279 return;
279 } 280 }
280 program->Configure(); 281 pipeline->Configure();
281 const auto& qmd{kepler_compute.launch_description}; 282 const auto& qmd{kepler_compute.launch_description};
282 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); 283 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
283 ++num_queued_commands; 284 ++num_queued_commands;
285 has_written_global_memory |= pipeline->WritesGlobalMemory();
284} 286}
285 287
286void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 288void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() {
449 451
450 // Make sure memory stored from the previous GL command stream is visible 452 // Make sure memory stored from the previous GL command stream is visible
451 // This is only needed on assembly shaders where we write to GPU memory with raw pointers 453 // This is only needed on assembly shaders where we write to GPU memory with raw pointers
452 // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used 454 if (has_written_global_memory) {
453 // and prefer using NV_shader_storage_buffer_object when possible 455 has_written_global_memory = false;
454 if (Settings::values.use_assembly_shaders.GetValue()) {
455 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); 456 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
456 } 457 }
457 glFlush(); 458 glFlush();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index afd43b2ee..d0397b745 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -225,7 +225,8 @@ private:
225 std::array<GLuint, MAX_IMAGES> image_handles{}; 225 std::array<GLuint, MAX_IMAGES> image_handles{};
226 226
227 /// Number of commands queued to the OpenGL driver. Resetted on flush. 227 /// Number of commands queued to the OpenGL driver. Resetted on flush.
228 std::size_t num_queued_commands = 0; 228 size_t num_queued_commands = 0;
229 bool has_written_global_memory = false;
229 230
230 u32 last_clip_distance_mask = 0; 231 u32 last_clip_distance_mask = 0;
231}; 232};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3aa5ac31d..287f497b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) {
157} 157}
158 158
159Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, 159Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
160 const Shader::IR::Program& program) { 160 const Shader::IR::Program& program,
161 bool glasm_use_storage_buffers) {
161 Shader::RuntimeInfo info; 162 Shader::RuntimeInfo info;
162 switch (program.stage) { 163 switch (program.stage) {
163 case Shader::Stage::TessellationEval: 164 case Shader::Stage::TessellationEval:
@@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
220 info.input_topology = Shader::InputTopology::TrianglesAdjacency; 221 info.input_topology = Shader::InputTopology::TrianglesAdjacency;
221 break; 222 break;
222 } 223 }
224 info.glasm_use_storage_buffers = glasm_use_storage_buffers;
223 return info; 225 return info;
224} 226}
225 227
@@ -435,7 +437,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
435 ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs, 437 ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs,
436 bool build_in_parallel) { 438 bool build_in_parallel) {
437 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); 439 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
438 size_t env_index{0}; 440 size_t env_index{};
441 u32 total_storage_buffers{};
439 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; 442 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
440 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 443 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
441 if (key.unique_hashes[index] == 0) { 444 if (key.unique_hashes[index] == 0) {
@@ -447,7 +450,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
447 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; 450 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
448 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); 451 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
449 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); 452 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
453
454 for (const auto& desc : programs[index].info.storage_buffers_descriptors) {
455 total_storage_buffers += desc.count;
456 }
450 } 457 }
458 const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()};
459 const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit};
460
451 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; 461 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
452 462
453 OGLProgram source_program; 463 OGLProgram source_program;
@@ -466,7 +476,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
466 const size_t stage_index{index - 1}; 476 const size_t stage_index{index - 1};
467 infos[stage_index] = &program.info; 477 infos[stage_index] = &program.info;
468 478
469 const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; 479 const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)};
470 if (device.UseAssemblyShaders()) { 480 if (device.UseAssemblyShaders()) {
471 const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; 481 const std::string code{EmitGLASM(profile, runtime_info, program, binding)};
472 assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); 482 assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index));
@@ -479,7 +489,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
479 LinkProgram(source_program.handle); 489 LinkProgram(source_program.handle);
480 } 490 }
481 return std::make_unique<GraphicsPipeline>( 491 return std::make_unique<GraphicsPipeline>(
482 texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, 492 device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
483 std::move(source_program), std::move(assembly_programs), infos, 493 std::move(source_program), std::move(assembly_programs), infos,
484 key.xfb_enabled != 0 ? &key.xfb_state : nullptr); 494 key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
485} 495}
@@ -508,10 +518,18 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
508 518
509 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; 519 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
510 Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; 520 Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
521
522 u32 num_storage_buffers{};
523 for (const auto& desc : program.info.storage_buffers_descriptors) {
524 num_storage_buffers += desc.count;
525 }
526 Shader::RuntimeInfo info;
527 info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
528
511 OGLAssemblyProgram asm_program; 529 OGLAssemblyProgram asm_program;
512 OGLProgram source_program; 530 OGLProgram source_program;
513 if (device.UseAssemblyShaders()) { 531 if (device.UseAssemblyShaders()) {
514 const std::string code{EmitGLASM(profile, program)}; 532 const std::string code{EmitGLASM(profile, info, program)};
515 asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); 533 asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
516 } else { 534 } else {
517 const std::vector<u32> code{EmitSPIRV(profile, program)}; 535 const std::vector<u32> code{EmitSPIRV(profile, program)};
@@ -519,7 +537,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
519 AddShader(GL_COMPUTE_SHADER, source_program.handle, code); 537 AddShader(GL_COMPUTE_SHADER, source_program.handle, code);
520 LinkProgram(source_program.handle); 538 LinkProgram(source_program.handle);
521 } 539 }
522 return std::make_unique<ComputePipeline>(texture_cache, buffer_cache, gpu_memory, 540 return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
523 kepler_compute, program_manager, program.info, 541 kepler_compute, program_manager, program.info,
524 std::move(source_program), std::move(asm_program)); 542 std::move(source_program), std::move(asm_program));
525} 543}