diff options
Diffstat (limited to '')
17 files changed, 503 insertions, 437 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index becdb7d54..d6d8e5f59 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -4,7 +4,6 @@ add_library(shader_recompiler STATIC | |||
| 4 | backend/glasm/emit_context.h | 4 | backend/glasm/emit_context.h |
| 5 | backend/glasm/emit_glasm.cpp | 5 | backend/glasm/emit_glasm.cpp |
| 6 | backend/glasm/emit_glasm.h | 6 | backend/glasm/emit_glasm.h |
| 7 | backend/glasm/emit_glasm_atomic.cpp | ||
| 8 | backend/glasm/emit_glasm_barriers.cpp | 7 | backend/glasm/emit_glasm_barriers.cpp |
| 9 | backend/glasm/emit_glasm_bitwise_conversion.cpp | 8 | backend/glasm/emit_glasm_bitwise_conversion.cpp |
| 10 | backend/glasm/emit_glasm_composite.cpp | 9 | backend/glasm/emit_glasm_composite.cpp |
diff --git a/src/shader_recompiler/backend/glasm/emit_context.cpp b/src/shader_recompiler/backend/glasm/emit_context.cpp index b5b0e2204..e18526816 100644 --- a/src/shader_recompiler/backend/glasm/emit_context.cpp +++ b/src/shader_recompiler/backend/glasm/emit_context.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "shader_recompiler/backend/bindings.h" | 7 | #include "shader_recompiler/backend/bindings.h" |
| 8 | #include "shader_recompiler/backend/glasm/emit_context.h" | 8 | #include "shader_recompiler/backend/glasm/emit_context.h" |
| 9 | #include "shader_recompiler/frontend/ir/program.h" | 9 | #include "shader_recompiler/frontend/ir/program.h" |
| 10 | #include "shader_recompiler/profile.h" | ||
| 10 | 11 | ||
| 11 | namespace Shader::Backend::GLASM { | 12 | namespace Shader::Backend::GLASM { |
| 12 | namespace { | 13 | namespace { |
| @@ -40,13 +41,21 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||
| 40 | Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); | 41 | Add("CBUFFER c{}[]={{program.buffer[{}]}};", desc.index, cbuf_index); |
| 41 | ++cbuf_index; | 42 | ++cbuf_index; |
| 42 | } | 43 | } |
| 44 | u32 ssbo_index{}; | ||
| 43 | for (const auto& desc : info.storage_buffers_descriptors) { | 45 | for (const auto& desc : info.storage_buffers_descriptors) { |
| 44 | if (desc.count != 1) { | 46 | if (desc.count != 1) { |
| 45 | throw NotImplementedException("Storage buffer descriptor array"); | 47 | throw NotImplementedException("Storage buffer descriptor array"); |
| 46 | } | 48 | } |
| 49 | if (runtime_info.glasm_use_storage_buffers) { | ||
| 50 | Add("STORAGE ssbo{}[]={{program.storage[{}]}};", ssbo_index, bindings.storage_buffer); | ||
| 51 | ++bindings.storage_buffer; | ||
| 52 | ++ssbo_index; | ||
| 53 | } | ||
| 47 | } | 54 | } |
| 48 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { | 55 | if (!runtime_info.glasm_use_storage_buffers) { |
| 49 | Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); | 56 | if (const size_t num = info.storage_buffers_descriptors.size(); num > 0) { |
| 57 | Add("PARAM c[{}]={{program.local[0..{}]}};", num, num - 1); | ||
| 58 | } | ||
| 50 | } | 59 | } |
| 51 | stage = program.stage; | 60 | stage = program.stage; |
| 52 | switch (program.stage) { | 61 | switch (program.stage) { |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.h b/src/shader_recompiler/backend/glasm/emit_glasm.h index 3d02d873e..3df32a4a6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.h +++ b/src/shader_recompiler/backend/glasm/emit_glasm.h | |||
| @@ -15,9 +15,10 @@ namespace Shader::Backend::GLASM { | |||
| 15 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, | 15 | [[nodiscard]] std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, |
| 16 | IR::Program& program, Bindings& bindings); | 16 | IR::Program& program, Bindings& bindings); |
| 17 | 17 | ||
| 18 | [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, IR::Program& program) { | 18 | [[nodiscard]] inline std::string EmitGLASM(const Profile& profile, const RuntimeInfo& runtime_info, |
| 19 | IR::Program& program) { | ||
| 19 | Bindings binding; | 20 | Bindings binding; |
| 20 | return EmitGLASM(profile, {}, program, binding); | 21 | return EmitGLASM(profile, runtime_info, program, binding); |
| 21 | } | 22 | } |
| 22 | 23 | ||
| 23 | } // namespace Shader::Backend::GLASM | 24 | } // namespace Shader::Backend::GLASM |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp deleted file mode 100644 index e72b252a3..000000000 --- a/src/shader_recompiler/backend/glasm/emit_glasm_atomic.cpp +++ /dev/null | |||
| @@ -1,351 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/glasm/emit_context.h" | ||
| 6 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 8 | |||
| 9 | namespace Shader::Backend::GLASM { | ||
| 10 | namespace { | ||
| 11 | void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | ||
| 12 | std::string_view then_expr, std::string_view else_expr = {}) { | ||
| 13 | // Operate on bindless SSBO, call the expression with bounds checking | ||
| 14 | // address = c[binding].xy | ||
| 15 | // length = c[binding].z | ||
| 16 | const u32 sb_binding{binding.U32()}; | ||
| 17 | ctx.Add("PK64.U DC,c[{}];" // pointer = address | ||
| 18 | "CVT.U64.U32 DC.z,{};" // offset = uint64_t(offset) | ||
| 19 | "ADD.U64 DC.x,DC.x,DC.z;" // pointer += offset | ||
| 20 | "SLT.U.CC RC.x,{},c[{}].z;", // cc = offset < length | ||
| 21 | sb_binding, offset, offset, sb_binding); | ||
| 22 | if (else_expr.empty()) { | ||
| 23 | ctx.Add("IF NE.x;{}ENDIF;", then_expr); | ||
| 24 | } else { | ||
| 25 | ctx.Add("IF NE.x;{}ELSE;{}ENDIF;", then_expr, else_expr); | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | template <typename ValueType> | ||
| 30 | void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 31 | ValueType value, std::string_view operation, std::string_view size) { | ||
| 32 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 33 | StorageOp(ctx, binding, offset, | ||
| 34 | fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); | ||
| 35 | } | ||
| 36 | } // namespace | ||
| 37 | |||
| 38 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 39 | ScalarU32 value) { | ||
| 40 | ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 41 | } | ||
| 42 | |||
| 43 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 44 | ScalarS32 value) { | ||
| 45 | ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 46 | } | ||
| 47 | |||
| 48 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 49 | ScalarU32 value) { | ||
| 50 | ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 51 | } | ||
| 52 | |||
| 53 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 54 | ScalarS32 value) { | ||
| 55 | ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 56 | } | ||
| 57 | |||
| 58 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 59 | ScalarU32 value) { | ||
| 60 | ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 61 | } | ||
| 62 | |||
| 63 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 64 | ScalarU32 value) { | ||
| 65 | ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 66 | } | ||
| 67 | |||
| 68 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 69 | ScalarU32 value) { | ||
| 70 | ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 71 | } | ||
| 72 | |||
| 73 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 74 | ScalarU32 value) { | ||
| 75 | ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 76 | } | ||
| 77 | |||
| 78 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 79 | ScalarU32 value) { | ||
| 80 | ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 81 | } | ||
| 82 | |||
| 83 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 84 | ScalarU32 value) { | ||
| 85 | ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 86 | } | ||
| 87 | |||
| 88 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 89 | ScalarU32 value) { | ||
| 90 | ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 91 | } | ||
| 92 | |||
| 93 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 94 | Register value) { | ||
| 95 | ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 96 | } | ||
| 97 | |||
| 98 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 99 | ScalarU32 offset, ScalarU32 value) { | ||
| 100 | Atom(ctx, inst, binding, offset, value, "ADD", "U32"); | ||
| 101 | } | ||
| 102 | |||
| 103 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 104 | ScalarU32 offset, ScalarS32 value) { | ||
| 105 | Atom(ctx, inst, binding, offset, value, "MIN", "S32"); | ||
| 106 | } | ||
| 107 | |||
| 108 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 109 | ScalarU32 offset, ScalarU32 value) { | ||
| 110 | Atom(ctx, inst, binding, offset, value, "MIN", "U32"); | ||
| 111 | } | ||
| 112 | |||
| 113 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 114 | ScalarU32 offset, ScalarS32 value) { | ||
| 115 | Atom(ctx, inst, binding, offset, value, "MAX", "S32"); | ||
| 116 | } | ||
| 117 | |||
| 118 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 119 | ScalarU32 offset, ScalarU32 value) { | ||
| 120 | Atom(ctx, inst, binding, offset, value, "MAX", "U32"); | ||
| 121 | } | ||
| 122 | |||
| 123 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 124 | ScalarU32 offset, ScalarU32 value) { | ||
| 125 | Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); | ||
| 126 | } | ||
| 127 | |||
| 128 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 129 | ScalarU32 offset, ScalarU32 value) { | ||
| 130 | Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); | ||
| 131 | } | ||
| 132 | |||
| 133 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 134 | ScalarU32 offset, ScalarU32 value) { | ||
| 135 | Atom(ctx, inst, binding, offset, value, "AND", "U32"); | ||
| 136 | } | ||
| 137 | |||
| 138 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 139 | ScalarU32 offset, ScalarU32 value) { | ||
| 140 | Atom(ctx, inst, binding, offset, value, "OR", "U32"); | ||
| 141 | } | ||
| 142 | |||
| 143 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 144 | ScalarU32 offset, ScalarU32 value) { | ||
| 145 | Atom(ctx, inst, binding, offset, value, "XOR", "U32"); | ||
| 146 | } | ||
| 147 | |||
| 148 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 149 | ScalarU32 offset, ScalarU32 value) { | ||
| 150 | Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); | ||
| 151 | } | ||
| 152 | |||
| 153 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 154 | ScalarU32 offset, Register value) { | ||
| 155 | Atom(ctx, inst, binding, offset, value, "ADD", "U64"); | ||
| 156 | } | ||
| 157 | |||
| 158 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 159 | ScalarU32 offset, Register value) { | ||
| 160 | Atom(ctx, inst, binding, offset, value, "MIN", "S64"); | ||
| 161 | } | ||
| 162 | |||
| 163 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 164 | ScalarU32 offset, Register value) { | ||
| 165 | Atom(ctx, inst, binding, offset, value, "MIN", "U64"); | ||
| 166 | } | ||
| 167 | |||
| 168 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 169 | ScalarU32 offset, Register value) { | ||
| 170 | Atom(ctx, inst, binding, offset, value, "MAX", "S64"); | ||
| 171 | } | ||
| 172 | |||
| 173 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 174 | ScalarU32 offset, Register value) { | ||
| 175 | Atom(ctx, inst, binding, offset, value, "MAX", "U64"); | ||
| 176 | } | ||
| 177 | |||
| 178 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 179 | ScalarU32 offset, Register value) { | ||
| 180 | Atom(ctx, inst, binding, offset, value, "AND", "U64"); | ||
| 181 | } | ||
| 182 | |||
| 183 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 184 | ScalarU32 offset, Register value) { | ||
| 185 | Atom(ctx, inst, binding, offset, value, "OR", "U64"); | ||
| 186 | } | ||
| 187 | |||
| 188 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 189 | ScalarU32 offset, Register value) { | ||
| 190 | Atom(ctx, inst, binding, offset, value, "XOR", "U64"); | ||
| 191 | } | ||
| 192 | |||
| 193 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 194 | ScalarU32 offset, Register value) { | ||
| 195 | Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); | ||
| 196 | } | ||
| 197 | |||
| 198 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 199 | ScalarU32 offset, ScalarF32 value) { | ||
| 200 | Atom(ctx, inst, binding, offset, value, "ADD", "F32"); | ||
| 201 | } | ||
| 202 | |||
| 203 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 204 | ScalarU32 offset, Register value) { | ||
| 205 | Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); | ||
| 206 | } | ||
| 207 | |||
| 208 | void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 209 | [[maybe_unused]] const IR::Value& binding, | ||
| 210 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 211 | throw NotImplementedException("GLASM instruction"); | ||
| 212 | } | ||
| 213 | |||
| 214 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 215 | ScalarU32 offset, Register value) { | ||
| 216 | Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); | ||
| 217 | } | ||
| 218 | |||
| 219 | void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 220 | [[maybe_unused]] const IR::Value& binding, | ||
| 221 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 222 | throw NotImplementedException("GLASM instruction"); | ||
| 223 | } | ||
| 224 | |||
| 225 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 226 | ScalarU32 offset, Register value) { | ||
| 227 | Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); | ||
| 228 | } | ||
| 229 | |||
| 230 | void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 231 | [[maybe_unused]] const IR::Value& binding, | ||
| 232 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 233 | throw NotImplementedException("GLASM instruction"); | ||
| 234 | } | ||
| 235 | |||
| 236 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 237 | throw NotImplementedException("GLASM instruction"); | ||
| 238 | } | ||
| 239 | |||
| 240 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 241 | throw NotImplementedException("GLASM instruction"); | ||
| 242 | } | ||
| 243 | |||
| 244 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 245 | throw NotImplementedException("GLASM instruction"); | ||
| 246 | } | ||
| 247 | |||
| 248 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 249 | throw NotImplementedException("GLASM instruction"); | ||
| 250 | } | ||
| 251 | |||
| 252 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 253 | throw NotImplementedException("GLASM instruction"); | ||
| 254 | } | ||
| 255 | |||
| 256 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 257 | throw NotImplementedException("GLASM instruction"); | ||
| 258 | } | ||
| 259 | |||
| 260 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 261 | throw NotImplementedException("GLASM instruction"); | ||
| 262 | } | ||
| 263 | |||
| 264 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 265 | throw NotImplementedException("GLASM instruction"); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 269 | throw NotImplementedException("GLASM instruction"); | ||
| 270 | } | ||
| 271 | |||
| 272 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 273 | throw NotImplementedException("GLASM instruction"); | ||
| 274 | } | ||
| 275 | |||
| 276 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 277 | throw NotImplementedException("GLASM instruction"); | ||
| 278 | } | ||
| 279 | |||
| 280 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 281 | throw NotImplementedException("GLASM instruction"); | ||
| 282 | } | ||
| 283 | |||
| 284 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 285 | throw NotImplementedException("GLASM instruction"); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 289 | throw NotImplementedException("GLASM instruction"); | ||
| 290 | } | ||
| 291 | |||
| 292 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 293 | throw NotImplementedException("GLASM instruction"); | ||
| 294 | } | ||
| 295 | |||
| 296 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 297 | throw NotImplementedException("GLASM instruction"); | ||
| 298 | } | ||
| 299 | |||
| 300 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 301 | throw NotImplementedException("GLASM instruction"); | ||
| 302 | } | ||
| 303 | |||
| 304 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 305 | throw NotImplementedException("GLASM instruction"); | ||
| 306 | } | ||
| 307 | |||
| 308 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 309 | throw NotImplementedException("GLASM instruction"); | ||
| 310 | } | ||
| 311 | |||
| 312 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 313 | throw NotImplementedException("GLASM instruction"); | ||
| 314 | } | ||
| 315 | |||
| 316 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 317 | throw NotImplementedException("GLASM instruction"); | ||
| 318 | } | ||
| 319 | |||
| 320 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 321 | throw NotImplementedException("GLASM instruction"); | ||
| 322 | } | ||
| 323 | |||
| 324 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 325 | throw NotImplementedException("GLASM instruction"); | ||
| 326 | } | ||
| 327 | |||
| 328 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 329 | throw NotImplementedException("GLASM instruction"); | ||
| 330 | } | ||
| 331 | |||
| 332 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 333 | throw NotImplementedException("GLASM instruction"); | ||
| 334 | } | ||
| 335 | |||
| 336 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 337 | throw NotImplementedException("GLASM instruction"); | ||
| 338 | } | ||
| 339 | |||
| 340 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 341 | throw NotImplementedException("GLASM instruction"); | ||
| 342 | } | ||
| 343 | |||
| 344 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 345 | throw NotImplementedException("GLASM instruction"); | ||
| 346 | } | ||
| 347 | |||
| 348 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 349 | throw NotImplementedException("GLASM instruction"); | ||
| 350 | } | ||
| 351 | } // namespace Shader::Backend::GLASM | ||
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 26b03587e..90dbb80d2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" | 8 | #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" |
| 9 | #include "shader_recompiler/frontend/ir/program.h" | 9 | #include "shader_recompiler/frontend/ir/program.h" |
| 10 | #include "shader_recompiler/frontend/ir/value.h" | 10 | #include "shader_recompiler/frontend/ir/value.h" |
| 11 | #include "shader_recompiler/profile.h" | ||
| 11 | 12 | ||
| 12 | namespace Shader::Backend::GLASM { | 13 | namespace Shader::Backend::GLASM { |
| 13 | namespace { | 14 | namespace { |
| @@ -29,7 +30,7 @@ void StorageOp(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, | |||
| 29 | } | 30 | } |
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_expr, | 33 | void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std::string_view expr, |
| 33 | std::string_view else_expr = {}) { | 34 | std::string_view else_expr = {}) { |
| 34 | const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; | 35 | const size_t num_buffers{ctx.info.storage_buffers_descriptors.size()}; |
| 35 | for (size_t index = 0; index < num_buffers; ++index) { | 36 | for (size_t index = 0; index < num_buffers; ++index) { |
| @@ -44,14 +45,22 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e | |||
| 44 | "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 | 45 | "SGE.U64 RC.x,{}.x,DC.x;" // a = input_addr >= ssbo_addr ? -1 : 1 |
| 45 | "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 | 46 | "SLT.U64 RC.y,{}.x,DC.y;" // b = input_addr < ssbo_end ? -1 : 1 |
| 46 | "AND.U.CC RC.x,RC.x,RC.y;" | 47 | "AND.U.CC RC.x,RC.x,RC.y;" |
| 47 | "IF NE.x;" // a && b | 48 | "IF NE.x;" // a && b |
| 48 | "SUB.U64 DC.x,{}.x,DC.x;" // offset = input_addr - ssbo_addr | 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr |
| 49 | "PK64.U DC.y,c[{}];" // host_ssbo = cbuf | ||
| 50 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | ||
| 51 | "{}" | ||
| 52 | "ELSE;", | ||
| 53 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, |
| 54 | address, address, index, then_expr); | 51 | address, address); |
| 52 | if (pointer_based) { | ||
| 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | ||
| 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | ||
| 55 | "{}" | ||
| 56 | "ELSE;", | ||
| 57 | index, expr); | ||
| 58 | } else { | ||
| 59 | ctx.Add("CVT.U32.U64 RC.x,DC.x;" | ||
| 60 | "{},ssbo{}[RC.x];" | ||
| 61 | "ELSE;", | ||
| 62 | expr, index); | ||
| 63 | } | ||
| 55 | } | 64 | } |
| 56 | if (!else_expr.empty()) { | 65 | if (!else_expr.empty()) { |
| 57 | ctx.Add("{}", else_expr); | 66 | ctx.Add("{}", else_expr); |
| @@ -64,25 +73,54 @@ void GlobalStorageOp(EmitContext& ctx, Register address, std::string_view then_e | |||
| 64 | template <typename ValueType> | 73 | template <typename ValueType> |
| 65 | void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, | 74 | void Write(EmitContext& ctx, const IR::Value& binding, ScalarU32 offset, ValueType value, |
| 66 | std::string_view size) { | 75 | std::string_view size) { |
| 67 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); | 76 | if (ctx.runtime_info.glasm_use_storage_buffers) { |
| 77 | ctx.Add("STB.{} {},ssbo{}[{}];", size, value, binding.U32(), offset); | ||
| 78 | } else { | ||
| 79 | StorageOp(ctx, binding, offset, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 80 | } | ||
| 68 | } | 81 | } |
| 69 | 82 | ||
| 70 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | 83 | void Load(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, |
| 71 | std::string_view size) { | 84 | std::string_view size) { |
| 72 | const Register ret{ctx.reg_alloc.Define(inst)}; | 85 | const Register ret{ctx.reg_alloc.Define(inst)}; |
| 73 | StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), | 86 | if (ctx.runtime_info.glasm_use_storage_buffers) { |
| 74 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); | 87 | ctx.Add("LDB.{} {},ssbo{}[{}];", size, ret, binding.U32(), offset); |
| 88 | } else { | ||
| 89 | StorageOp(ctx, binding, offset, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 90 | fmt::format("MOV.U {},{{0,0,0,0}};", ret)); | ||
| 91 | } | ||
| 75 | } | 92 | } |
| 76 | 93 | ||
| 77 | template <typename ValueType> | 94 | template <typename ValueType> |
| 78 | void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { | 95 | void GlobalWrite(EmitContext& ctx, Register address, ValueType value, std::string_view size) { |
| 79 | GlobalStorageOp(ctx, address, fmt::format("STORE.{} {},DC.x;", size, value)); | 96 | if (ctx.runtime_info.glasm_use_storage_buffers) { |
| 97 | GlobalStorageOp(ctx, address, false, fmt::format("STB.{} {}", size, value)); | ||
| 98 | } else { | ||
| 99 | GlobalStorageOp(ctx, address, true, fmt::format("STORE.{} {},DC.x;", size, value)); | ||
| 100 | } | ||
| 80 | } | 101 | } |
| 81 | 102 | ||
| 82 | void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { | 103 | void GlobalLoad(EmitContext& ctx, IR::Inst& inst, Register address, std::string_view size) { |
| 83 | const Register ret{ctx.reg_alloc.Define(inst)}; | 104 | const Register ret{ctx.reg_alloc.Define(inst)}; |
| 84 | GlobalStorageOp(ctx, address, fmt::format("LOAD.{} {},DC.x;", size, ret), | 105 | if (ctx.runtime_info.glasm_use_storage_buffers) { |
| 85 | fmt::format("MOV.S {},0;", ret)); | 106 | GlobalStorageOp(ctx, address, false, fmt::format("LDB.{} {}", size, ret)); |
| 107 | } else { | ||
| 108 | GlobalStorageOp(ctx, address, true, fmt::format("LOAD.{} {},DC.x;", size, ret), | ||
| 109 | fmt::format("MOV.S {},0;", ret)); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | template <typename ValueType> | ||
| 114 | void Atom(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU32 offset, | ||
| 115 | ValueType value, std::string_view operation, std::string_view size) { | ||
| 116 | const Register ret{ctx.reg_alloc.Define(inst)}; | ||
| 117 | if (ctx.runtime_info.glasm_use_storage_buffers) { | ||
| 118 | ctx.Add("ATOMB.{}.{} {},{},ssbo{}[{}];", operation, size, ret, value, binding.U32(), | ||
| 119 | offset); | ||
| 120 | } else { | ||
| 121 | StorageOp(ctx, binding, offset, | ||
| 122 | fmt::format("ATOM.{}.{} {},{},DC.x;", operation, size, ret, value)); | ||
| 123 | } | ||
| 86 | } | 124 | } |
| 87 | } // Anonymous namespace | 125 | } // Anonymous namespace |
| 88 | 126 | ||
| @@ -212,4 +250,318 @@ void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, ScalarU32 o | |||
| 212 | Write(ctx, binding, offset, value, "U32X4"); | 250 | Write(ctx, binding, offset, value, "U32X4"); |
| 213 | } | 251 | } |
| 214 | 252 | ||
| 253 | void EmitSharedAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 254 | ScalarU32 value) { | ||
| 255 | ctx.Add("ATOMS.ADD.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 256 | } | ||
| 257 | |||
| 258 | void EmitSharedAtomicSMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 259 | ScalarS32 value) { | ||
| 260 | ctx.Add("ATOMS.MIN.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 261 | } | ||
| 262 | |||
| 263 | void EmitSharedAtomicUMin32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 264 | ScalarU32 value) { | ||
| 265 | ctx.Add("ATOMS.MIN.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 266 | } | ||
| 267 | |||
| 268 | void EmitSharedAtomicSMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 269 | ScalarS32 value) { | ||
| 270 | ctx.Add("ATOMS.MAX.S32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 271 | } | ||
| 272 | |||
| 273 | void EmitSharedAtomicUMax32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 274 | ScalarU32 value) { | ||
| 275 | ctx.Add("ATOMS.MAX.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 276 | } | ||
| 277 | |||
| 278 | void EmitSharedAtomicInc32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 279 | ScalarU32 value) { | ||
| 280 | ctx.Add("ATOMS.IWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 281 | } | ||
| 282 | |||
| 283 | void EmitSharedAtomicDec32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 284 | ScalarU32 value) { | ||
| 285 | ctx.Add("ATOMS.DWRAP.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 286 | } | ||
| 287 | |||
| 288 | void EmitSharedAtomicAnd32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 289 | ScalarU32 value) { | ||
| 290 | ctx.Add("ATOMS.AND.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 291 | } | ||
| 292 | |||
| 293 | void EmitSharedAtomicOr32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 294 | ScalarU32 value) { | ||
| 295 | ctx.Add("ATOMS.OR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 296 | } | ||
| 297 | |||
| 298 | void EmitSharedAtomicXor32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 299 | ScalarU32 value) { | ||
| 300 | ctx.Add("ATOMS.XOR.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 301 | } | ||
| 302 | |||
| 303 | void EmitSharedAtomicExchange32(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 304 | ScalarU32 value) { | ||
| 305 | ctx.Add("ATOMS.EXCH.U32 {},{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 306 | } | ||
| 307 | |||
| 308 | void EmitSharedAtomicExchange64(EmitContext& ctx, IR::Inst& inst, ScalarU32 pointer_offset, | ||
| 309 | Register value) { | ||
| 310 | ctx.LongAdd("ATOMS.EXCH.U64 {}.x,{},shared_mem[{}];", inst, value, pointer_offset); | ||
| 311 | } | ||
| 312 | |||
| 313 | void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 314 | ScalarU32 offset, ScalarU32 value) { | ||
| 315 | Atom(ctx, inst, binding, offset, value, "ADD", "U32"); | ||
| 316 | } | ||
| 317 | |||
| 318 | void EmitStorageAtomicSMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 319 | ScalarU32 offset, ScalarS32 value) { | ||
| 320 | Atom(ctx, inst, binding, offset, value, "MIN", "S32"); | ||
| 321 | } | ||
| 322 | |||
| 323 | void EmitStorageAtomicUMin32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 324 | ScalarU32 offset, ScalarU32 value) { | ||
| 325 | Atom(ctx, inst, binding, offset, value, "MIN", "U32"); | ||
| 326 | } | ||
| 327 | |||
| 328 | void EmitStorageAtomicSMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 329 | ScalarU32 offset, ScalarS32 value) { | ||
| 330 | Atom(ctx, inst, binding, offset, value, "MAX", "S32"); | ||
| 331 | } | ||
| 332 | |||
| 333 | void EmitStorageAtomicUMax32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 334 | ScalarU32 offset, ScalarU32 value) { | ||
| 335 | Atom(ctx, inst, binding, offset, value, "MAX", "U32"); | ||
| 336 | } | ||
| 337 | |||
| 338 | void EmitStorageAtomicInc32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 339 | ScalarU32 offset, ScalarU32 value) { | ||
| 340 | Atom(ctx, inst, binding, offset, value, "IWRAP", "U32"); | ||
| 341 | } | ||
| 342 | |||
| 343 | void EmitStorageAtomicDec32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 344 | ScalarU32 offset, ScalarU32 value) { | ||
| 345 | Atom(ctx, inst, binding, offset, value, "DWRAP", "U32"); | ||
| 346 | } | ||
| 347 | |||
| 348 | void EmitStorageAtomicAnd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 349 | ScalarU32 offset, ScalarU32 value) { | ||
| 350 | Atom(ctx, inst, binding, offset, value, "AND", "U32"); | ||
| 351 | } | ||
| 352 | |||
| 353 | void EmitStorageAtomicOr32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 354 | ScalarU32 offset, ScalarU32 value) { | ||
| 355 | Atom(ctx, inst, binding, offset, value, "OR", "U32"); | ||
| 356 | } | ||
| 357 | |||
| 358 | void EmitStorageAtomicXor32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 359 | ScalarU32 offset, ScalarU32 value) { | ||
| 360 | Atom(ctx, inst, binding, offset, value, "XOR", "U32"); | ||
| 361 | } | ||
| 362 | |||
| 363 | void EmitStorageAtomicExchange32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 364 | ScalarU32 offset, ScalarU32 value) { | ||
| 365 | Atom(ctx, inst, binding, offset, value, "EXCH", "U32"); | ||
| 366 | } | ||
| 367 | |||
| 368 | void EmitStorageAtomicIAdd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 369 | ScalarU32 offset, Register value) { | ||
| 370 | Atom(ctx, inst, binding, offset, value, "ADD", "U64"); | ||
| 371 | } | ||
| 372 | |||
| 373 | void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 374 | ScalarU32 offset, Register value) { | ||
| 375 | Atom(ctx, inst, binding, offset, value, "MIN", "S64"); | ||
| 376 | } | ||
| 377 | |||
| 378 | void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 379 | ScalarU32 offset, Register value) { | ||
| 380 | Atom(ctx, inst, binding, offset, value, "MIN", "U64"); | ||
| 381 | } | ||
| 382 | |||
| 383 | void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 384 | ScalarU32 offset, Register value) { | ||
| 385 | Atom(ctx, inst, binding, offset, value, "MAX", "S64"); | ||
| 386 | } | ||
| 387 | |||
| 388 | void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 389 | ScalarU32 offset, Register value) { | ||
| 390 | Atom(ctx, inst, binding, offset, value, "MAX", "U64"); | ||
| 391 | } | ||
| 392 | |||
| 393 | void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 394 | ScalarU32 offset, Register value) { | ||
| 395 | Atom(ctx, inst, binding, offset, value, "AND", "U64"); | ||
| 396 | } | ||
| 397 | |||
| 398 | void EmitStorageAtomicOr64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 399 | ScalarU32 offset, Register value) { | ||
| 400 | Atom(ctx, inst, binding, offset, value, "OR", "U64"); | ||
| 401 | } | ||
| 402 | |||
| 403 | void EmitStorageAtomicXor64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 404 | ScalarU32 offset, Register value) { | ||
| 405 | Atom(ctx, inst, binding, offset, value, "XOR", "U64"); | ||
| 406 | } | ||
| 407 | |||
| 408 | void EmitStorageAtomicExchange64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 409 | ScalarU32 offset, Register value) { | ||
| 410 | Atom(ctx, inst, binding, offset, value, "EXCH", "U64"); | ||
| 411 | } | ||
| 412 | |||
| 413 | void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 414 | ScalarU32 offset, ScalarF32 value) { | ||
| 415 | Atom(ctx, inst, binding, offset, value, "ADD", "F32"); | ||
| 416 | } | ||
| 417 | |||
| 418 | void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 419 | ScalarU32 offset, Register value) { | ||
| 420 | Atom(ctx, inst, binding, offset, value, "ADD", "F16x2"); | ||
| 421 | } | ||
| 422 | |||
| 423 | void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 424 | [[maybe_unused]] const IR::Value& binding, | ||
| 425 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 426 | throw NotImplementedException("GLASM instruction"); | ||
| 427 | } | ||
| 428 | |||
| 429 | void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 430 | ScalarU32 offset, Register value) { | ||
| 431 | Atom(ctx, inst, binding, offset, value, "MIN", "F16x2"); | ||
| 432 | } | ||
| 433 | |||
| 434 | void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 435 | [[maybe_unused]] const IR::Value& binding, | ||
| 436 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 437 | throw NotImplementedException("GLASM instruction"); | ||
| 438 | } | ||
| 439 | |||
| 440 | void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||
| 441 | ScalarU32 offset, Register value) { | ||
| 442 | Atom(ctx, inst, binding, offset, value, "MAX", "F16x2"); | ||
| 443 | } | ||
| 444 | |||
| 445 | void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, | ||
| 446 | [[maybe_unused]] const IR::Value& binding, | ||
| 447 | [[maybe_unused]] ScalarU32 offset, [[maybe_unused]] Register value) { | ||
| 448 | throw NotImplementedException("GLASM instruction"); | ||
| 449 | } | ||
| 450 | |||
| 451 | void EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 452 | throw NotImplementedException("GLASM instruction"); | ||
| 453 | } | ||
| 454 | |||
| 455 | void EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 456 | throw NotImplementedException("GLASM instruction"); | ||
| 457 | } | ||
| 458 | |||
| 459 | void EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 460 | throw NotImplementedException("GLASM instruction"); | ||
| 461 | } | ||
| 462 | |||
| 463 | void EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 464 | throw NotImplementedException("GLASM instruction"); | ||
| 465 | } | ||
| 466 | |||
| 467 | void EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 468 | throw NotImplementedException("GLASM instruction"); | ||
| 469 | } | ||
| 470 | |||
| 471 | void EmitGlobalAtomicInc32(EmitContext&) { | ||
| 472 | throw NotImplementedException("GLASM instruction"); | ||
| 473 | } | ||
| 474 | |||
| 475 | void EmitGlobalAtomicDec32(EmitContext&) { | ||
| 476 | throw NotImplementedException("GLASM instruction"); | ||
| 477 | } | ||
| 478 | |||
| 479 | void EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 480 | throw NotImplementedException("GLASM instruction"); | ||
| 481 | } | ||
| 482 | |||
| 483 | void EmitGlobalAtomicOr32(EmitContext&) { | ||
| 484 | throw NotImplementedException("GLASM instruction"); | ||
| 485 | } | ||
| 486 | |||
| 487 | void EmitGlobalAtomicXor32(EmitContext&) { | ||
| 488 | throw NotImplementedException("GLASM instruction"); | ||
| 489 | } | ||
| 490 | |||
| 491 | void EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 492 | throw NotImplementedException("GLASM instruction"); | ||
| 493 | } | ||
| 494 | |||
| 495 | void EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 496 | throw NotImplementedException("GLASM instruction"); | ||
| 497 | } | ||
| 498 | |||
| 499 | void EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 500 | throw NotImplementedException("GLASM instruction"); | ||
| 501 | } | ||
| 502 | |||
| 503 | void EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 504 | throw NotImplementedException("GLASM instruction"); | ||
| 505 | } | ||
| 506 | |||
| 507 | void EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 508 | throw NotImplementedException("GLASM instruction"); | ||
| 509 | } | ||
| 510 | |||
| 511 | void EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 512 | throw NotImplementedException("GLASM instruction"); | ||
| 513 | } | ||
| 514 | |||
| 515 | void EmitGlobalAtomicInc64(EmitContext&) { | ||
| 516 | throw NotImplementedException("GLASM instruction"); | ||
| 517 | } | ||
| 518 | |||
| 519 | void EmitGlobalAtomicDec64(EmitContext&) { | ||
| 520 | throw NotImplementedException("GLASM instruction"); | ||
| 521 | } | ||
| 522 | |||
| 523 | void EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 524 | throw NotImplementedException("GLASM instruction"); | ||
| 525 | } | ||
| 526 | |||
| 527 | void EmitGlobalAtomicOr64(EmitContext&) { | ||
| 528 | throw NotImplementedException("GLASM instruction"); | ||
| 529 | } | ||
| 530 | |||
| 531 | void EmitGlobalAtomicXor64(EmitContext&) { | ||
| 532 | throw NotImplementedException("GLASM instruction"); | ||
| 533 | } | ||
| 534 | |||
| 535 | void EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 536 | throw NotImplementedException("GLASM instruction"); | ||
| 537 | } | ||
| 538 | |||
| 539 | void EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 540 | throw NotImplementedException("GLASM instruction"); | ||
| 541 | } | ||
| 542 | |||
| 543 | void EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 544 | throw NotImplementedException("GLASM instruction"); | ||
| 545 | } | ||
| 546 | |||
| 547 | void EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 548 | throw NotImplementedException("GLASM instruction"); | ||
| 549 | } | ||
| 550 | |||
| 551 | void EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 552 | throw NotImplementedException("GLASM instruction"); | ||
| 553 | } | ||
| 554 | |||
| 555 | void EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 556 | throw NotImplementedException("GLASM instruction"); | ||
| 557 | } | ||
| 558 | |||
| 559 | void EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 560 | throw NotImplementedException("GLASM instruction"); | ||
| 561 | } | ||
| 562 | |||
| 563 | void EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 564 | throw NotImplementedException("GLASM instruction"); | ||
| 565 | } | ||
| 566 | |||
| 215 | } // namespace Shader::Backend::GLASM | 567 | } // namespace Shader::Backend::GLASM |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index c46452c3d..f8913bf14 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -111,7 +111,10 @@ struct RuntimeInfo { | |||
| 111 | std::optional<CompareFunction> alpha_test_func; | 111 | std::optional<CompareFunction> alpha_test_func; |
| 112 | float alpha_test_reference{}; | 112 | float alpha_test_reference{}; |
| 113 | 113 | ||
| 114 | // Static y negate value | ||
| 114 | bool y_negate{}; | 115 | bool y_negate{}; |
| 116 | // Use storage buffers instead of global pointers on GLASM | ||
| 117 | bool glasm_use_storage_buffers{}; | ||
| 115 | 118 | ||
| 116 | std::vector<TransformFeedbackVarying> xfb_varyings; | 119 | std::vector<TransformFeedbackVarying> xfb_varyings; |
| 117 | }; | 120 | }; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2d0ef1307..334ed470f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -195,7 +195,12 @@ void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buf | |||
| 195 | 195 | ||
| 196 | void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, | 196 | void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 197 | u32 offset, u32 size, bool is_written) { | 197 | u32 offset, u32 size, bool is_written) { |
| 198 | if (use_assembly_shaders) { | 198 | if (use_storage_buffers) { |
| 199 | const GLuint base_binding = graphics_base_storage_bindings[stage]; | ||
| 200 | const GLuint binding = base_binding + binding_index; | ||
| 201 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||
| 202 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 203 | } else { | ||
| 199 | const BindlessSSBO ssbo{ | 204 | const BindlessSSBO ssbo{ |
| 200 | .address = buffer.HostGpuAddr() + offset, | 205 | .address = buffer.HostGpuAddr() + offset, |
| 201 | .length = static_cast<GLsizei>(size), | 206 | .length = static_cast<GLsizei>(size), |
| @@ -204,17 +209,19 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff | |||
| 204 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | 209 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); |
| 205 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | 210 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, |
| 206 | reinterpret_cast<const GLuint*>(&ssbo)); | 211 | reinterpret_cast<const GLuint*>(&ssbo)); |
| 207 | } else { | ||
| 208 | const GLuint base_binding = graphics_base_storage_bindings[stage]; | ||
| 209 | const GLuint binding = base_binding + binding_index; | ||
| 210 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||
| 211 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 212 | } | 212 | } |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, | 215 | void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 216 | u32 size, bool is_written) { | 216 | u32 size, bool is_written) { |
| 217 | if (use_assembly_shaders) { | 217 | if (use_storage_buffers) { |
| 218 | if (size != 0) { | ||
| 219 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||
| 220 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 221 | } else { | ||
| 222 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||
| 223 | } | ||
| 224 | } else { | ||
| 218 | const BindlessSSBO ssbo{ | 225 | const BindlessSSBO ssbo{ |
| 219 | .address = buffer.HostGpuAddr() + offset, | 226 | .address = buffer.HostGpuAddr() + offset, |
| 220 | .length = static_cast<GLsizei>(size), | 227 | .length = static_cast<GLsizei>(size), |
| @@ -223,11 +230,6 @@ void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buf | |||
| 223 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | 230 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); |
| 224 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, | 231 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, |
| 225 | reinterpret_cast<const GLuint*>(&ssbo)); | 232 | reinterpret_cast<const GLuint*>(&ssbo)); |
| 226 | } else if (size == 0) { | ||
| 227 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||
| 228 | } else { | ||
| 229 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||
| 230 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 231 | } | 233 | } |
| 232 | } | 234 | } |
| 233 | 235 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4986c65fd..bc16abafb 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -147,6 +147,10 @@ public: | |||
| 147 | image_handles = image_handles_; | 147 | image_handles = image_handles_; |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | void SetEnableStorageBuffers(bool use_storage_buffers_) { | ||
| 151 | use_storage_buffers = use_storage_buffers_; | ||
| 152 | } | ||
| 153 | |||
| 150 | private: | 154 | private: |
| 151 | static constexpr std::array PABO_LUT{ | 155 | static constexpr std::array PABO_LUT{ |
| 152 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 156 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| @@ -160,6 +164,8 @@ private: | |||
| 160 | bool use_assembly_shaders = false; | 164 | bool use_assembly_shaders = false; |
| 161 | bool has_unified_vertex_buffers = false; | 165 | bool has_unified_vertex_buffers = false; |
| 162 | 166 | ||
| 167 | bool use_storage_buffers = false; | ||
| 168 | |||
| 163 | u32 max_attributes = 0; | 169 | u32 max_attributes = 0; |
| 164 | 170 | ||
| 165 | std::array<GLuint, 5> graphics_base_uniform_bindings{}; | 171 | std::array<GLuint, 5> graphics_base_uniform_bindings{}; |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 700ebd8b8..5cf5f97a9 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp | |||
| @@ -17,6 +17,15 @@ using VideoCommon::ImageId; | |||
| 17 | constexpr u32 MAX_TEXTURES = 64; | 17 | constexpr u32 MAX_TEXTURES = 64; |
| 18 | constexpr u32 MAX_IMAGES = 16; | 18 | constexpr u32 MAX_IMAGES = 16; |
| 19 | 19 | ||
| 20 | template <typename Range> | ||
| 21 | u32 AccumulateCount(const Range& range) { | ||
| 22 | u32 num{}; | ||
| 23 | for (const auto& desc : range) { | ||
| 24 | num += desc.count; | ||
| 25 | } | ||
| 26 | return num; | ||
| 27 | } | ||
| 28 | |||
| 20 | size_t ComputePipelineKey::Hash() const noexcept { | 29 | size_t ComputePipelineKey::Hash() const noexcept { |
| 21 | return static_cast<size_t>( | 30 | return static_cast<size_t>( |
| 22 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); | 31 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); |
| @@ -26,31 +35,31 @@ bool ComputePipelineKey::operator==(const ComputePipelineKey& rhs) const noexcep | |||
| 26 | return std::memcmp(this, &rhs, sizeof *this) == 0; | 35 | return std::memcmp(this, &rhs, sizeof *this) == 0; |
| 27 | } | 36 | } |
| 28 | 37 | ||
| 29 | ComputePipeline::ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, | 38 | ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cache_, |
| 30 | Tegra::MemoryManager& gpu_memory_, | 39 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, |
| 31 | Tegra::Engines::KeplerCompute& kepler_compute_, | 40 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 32 | ProgramManager& program_manager_, const Shader::Info& info_, | 41 | ProgramManager& program_manager_, const Shader::Info& info_, |
| 33 | OGLProgram source_program_, OGLAssemblyProgram assembly_program_) | 42 | OGLProgram source_program_, OGLAssemblyProgram assembly_program_) |
| 34 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, | 43 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, |
| 35 | kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, | 44 | kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_}, |
| 36 | source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { | 45 | source_program{std::move(source_program_)}, assembly_program{std::move(assembly_program_)} { |
| 37 | for (const auto& desc : info.texture_buffer_descriptors) { | 46 | |
| 38 | num_texture_buffers += desc.count; | 47 | num_texture_buffers = AccumulateCount(info.texture_buffer_descriptors); |
| 39 | } | 48 | num_image_buffers = AccumulateCount(info.image_buffer_descriptors); |
| 40 | for (const auto& desc : info.image_buffer_descriptors) { | 49 | |
| 41 | num_image_buffers += desc.count; | 50 | const u32 num_textures{num_texture_buffers + AccumulateCount(info.texture_descriptors)}; |
| 42 | } | ||
| 43 | u32 num_textures = num_texture_buffers; | ||
| 44 | for (const auto& desc : info.texture_descriptors) { | ||
| 45 | num_textures += desc.count; | ||
| 46 | } | ||
| 47 | ASSERT(num_textures <= MAX_TEXTURES); | 51 | ASSERT(num_textures <= MAX_TEXTURES); |
| 48 | 52 | ||
| 49 | u32 num_images = num_image_buffers; | 53 | const u32 num_images{num_image_buffers + AccumulateCount(info.image_descriptors)}; |
| 50 | for (const auto& desc : info.image_descriptors) { | ||
| 51 | num_images += desc.count; | ||
| 52 | } | ||
| 53 | ASSERT(num_images <= MAX_IMAGES); | 54 | ASSERT(num_images <= MAX_IMAGES); |
| 55 | |||
| 56 | const bool is_glasm{assembly_program.handle != 0}; | ||
| 57 | const u32 num_storage_buffers{AccumulateCount(info.storage_buffers_descriptors)}; | ||
| 58 | use_storage_buffers = | ||
| 59 | !is_glasm || num_storage_buffers < device.GetMaxGLASMStorageBufferBlocks(); | ||
| 60 | writes_global_memory = !use_storage_buffers && | ||
| 61 | std::ranges::any_of(info.storage_buffers_descriptors, | ||
| 62 | [](const auto& desc) { return desc.is_written; }); | ||
| 54 | } | 63 | } |
| 55 | 64 | ||
| 56 | void ComputePipeline::Configure() { | 65 | void ComputePipeline::Configure() { |
| @@ -150,6 +159,7 @@ void ComputePipeline::Configure() { | |||
| 150 | 159 | ||
| 151 | buffer_cache.UpdateComputeBuffers(); | 160 | buffer_cache.UpdateComputeBuffers(); |
| 152 | 161 | ||
| 162 | buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); | ||
| 153 | buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); | 163 | buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); |
| 154 | buffer_cache.BindHostComputeBuffers(); | 164 | buffer_cache.BindHostComputeBuffers(); |
| 155 | 165 | ||
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index e3b94e2f3..dd6b62ef2 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h | |||
| @@ -28,6 +28,7 @@ struct Info; | |||
| 28 | 28 | ||
| 29 | namespace OpenGL { | 29 | namespace OpenGL { |
| 30 | 30 | ||
| 31 | class Device; | ||
| 31 | class ProgramManager; | 32 | class ProgramManager; |
| 32 | 33 | ||
| 33 | struct ComputePipelineKey { | 34 | struct ComputePipelineKey { |
| @@ -49,14 +50,18 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineKey>); | |||
| 49 | 50 | ||
| 50 | class ComputePipeline { | 51 | class ComputePipeline { |
| 51 | public: | 52 | public: |
| 52 | explicit ComputePipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, | 53 | explicit ComputePipeline(const Device& device, TextureCache& texture_cache_, |
| 53 | Tegra::MemoryManager& gpu_memory_, | 54 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, |
| 54 | Tegra::Engines::KeplerCompute& kepler_compute_, | 55 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 55 | ProgramManager& program_manager_, const Shader::Info& info_, | 56 | ProgramManager& program_manager_, const Shader::Info& info_, |
| 56 | OGLProgram source_program_, OGLAssemblyProgram assembly_program_); | 57 | OGLProgram source_program_, OGLAssemblyProgram assembly_program_); |
| 57 | 58 | ||
| 58 | void Configure(); | 59 | void Configure(); |
| 59 | 60 | ||
| 61 | [[nodiscard]] bool WritesGlobalMemory() const noexcept { | ||
| 62 | return writes_global_memory; | ||
| 63 | } | ||
| 64 | |||
| 60 | private: | 65 | private: |
| 61 | TextureCache& texture_cache; | 66 | TextureCache& texture_cache; |
| 62 | BufferCache& buffer_cache; | 67 | BufferCache& buffer_cache; |
| @@ -70,6 +75,9 @@ private: | |||
| 70 | 75 | ||
| 71 | u32 num_texture_buffers{}; | 76 | u32 num_texture_buffers{}; |
| 72 | u32 num_image_buffers{}; | 77 | u32 num_image_buffers{}; |
| 78 | |||
| 79 | bool use_storage_buffers{}; | ||
| 80 | bool writes_global_memory{}; | ||
| 73 | }; | 81 | }; |
| 74 | 82 | ||
| 75 | } // namespace OpenGL | 83 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 18bbc4c1f..01da2bb57 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -135,13 +135,13 @@ Device::Device() { | |||
| 135 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | 135 | "Beta driver 443.24 is known to have issues. There might be performance issues."); |
| 136 | disable_fast_buffer_sub_data = true; | 136 | disable_fast_buffer_sub_data = true; |
| 137 | } | 137 | } |
| 138 | |||
| 139 | max_uniform_buffers = BuildMaxUniformBuffers(); | 138 | max_uniform_buffers = BuildMaxUniformBuffers(); |
| 140 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 139 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 141 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 140 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 142 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 141 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 143 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 142 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 144 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); | 143 | max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE); |
| 144 | max_glasm_storage_buffer_blocks = GetInteger<u32>(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS); | ||
| 145 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && | 145 | has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && |
| 146 | GLAD_GL_NV_shader_thread_shuffle; | 146 | GLAD_GL_NV_shader_thread_shuffle; |
| 147 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | 147 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; |
| @@ -236,22 +236,6 @@ std::string Device::GetVendorName() const { | |||
| 236 | return vendor_name; | 236 | return vendor_name; |
| 237 | } | 237 | } |
| 238 | 238 | ||
| 239 | Device::Device(std::nullptr_t) { | ||
| 240 | max_uniform_buffers.fill(std::numeric_limits<u32>::max()); | ||
| 241 | uniform_buffer_alignment = 4; | ||
| 242 | shader_storage_alignment = 4; | ||
| 243 | max_vertex_attributes = 16; | ||
| 244 | max_varyings = 15; | ||
| 245 | max_compute_shared_memory_size = 0x10000; | ||
| 246 | has_warp_intrinsics = true; | ||
| 247 | has_shader_ballot = true; | ||
| 248 | has_vertex_viewport_layer = true; | ||
| 249 | has_image_load_formatted = true; | ||
| 250 | has_texture_shadow_lod = true; | ||
| 251 | has_variable_aoffi = true; | ||
| 252 | has_depth_buffer_float = true; | ||
| 253 | } | ||
| 254 | |||
| 255 | bool Device::TestVariableAoffi() { | 239 | bool Device::TestVariableAoffi() { |
| 256 | return TestProgram(R"(#version 430 core | 240 | return TestProgram(R"(#version 430 core |
| 257 | // This is a unit test, please ignore me on apitrace bug reports. | 241 | // This is a unit test, please ignore me on apitrace bug reports. |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 152a3acd3..d67f5693c 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -13,7 +13,6 @@ namespace OpenGL { | |||
| 13 | class Device { | 13 | class Device { |
| 14 | public: | 14 | public: |
| 15 | explicit Device(); | 15 | explicit Device(); |
| 16 | explicit Device(std::nullptr_t); | ||
| 17 | 16 | ||
| 18 | [[nodiscard]] std::string GetVendorName() const; | 17 | [[nodiscard]] std::string GetVendorName() const; |
| 19 | 18 | ||
| @@ -41,6 +40,10 @@ public: | |||
| 41 | return max_compute_shared_memory_size; | 40 | return max_compute_shared_memory_size; |
| 42 | } | 41 | } |
| 43 | 42 | ||
| 43 | u32 GetMaxGLASMStorageBufferBlocks() const { | ||
| 44 | return max_glasm_storage_buffer_blocks; | ||
| 45 | } | ||
| 46 | |||
| 44 | bool HasWarpIntrinsics() const { | 47 | bool HasWarpIntrinsics() const { |
| 45 | return has_warp_intrinsics; | 48 | return has_warp_intrinsics; |
| 46 | } | 49 | } |
| @@ -124,6 +127,7 @@ private: | |||
| 124 | u32 max_vertex_attributes{}; | 127 | u32 max_vertex_attributes{}; |
| 125 | u32 max_varyings{}; | 128 | u32 max_varyings{}; |
| 126 | u32 max_compute_shared_memory_size{}; | 129 | u32 max_compute_shared_memory_size{}; |
| 130 | u32 max_glasm_storage_buffer_blocks{}; | ||
| 127 | bool has_warp_intrinsics{}; | 131 | bool has_warp_intrinsics{}; |
| 128 | bool has_shader_ballot{}; | 132 | bool has_shader_ballot{}; |
| 129 | bool has_vertex_viewport_layer{}; | 133 | bool has_vertex_viewport_layer{}; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 32df35202..19d85c482 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -25,7 +25,7 @@ constexpr u32 MAX_TEXTURES = 64; | |||
| 25 | constexpr u32 MAX_IMAGES = 8; | 25 | constexpr u32 MAX_IMAGES = 8; |
| 26 | 26 | ||
| 27 | template <typename Range> | 27 | template <typename Range> |
| 28 | u32 AccumulateCount(Range&& range) { | 28 | u32 AccumulateCount(const Range& range) { |
| 29 | u32 num{}; | 29 | u32 num{}; |
| 30 | for (const auto& desc : range) { | 30 | for (const auto& desc : range) { |
| 31 | num += desc.count; | 31 | num += desc.count; |
| @@ -70,8 +70,8 @@ bool GraphicsPipelineKey::operator==(const GraphicsPipelineKey& rhs) const noexc | |||
| 70 | return std::memcmp(this, &rhs, Size()) == 0; | 70 | return std::memcmp(this, &rhs, Size()) == 0; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, | 73 | GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_cache_, |
| 74 | Tegra::MemoryManager& gpu_memory_, | 74 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, |
| 75 | Tegra::Engines::Maxwell3D& maxwell3d_, | 75 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 76 | ProgramManager& program_manager_, StateTracker& state_tracker_, | 76 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 77 | OGLProgram program_, | 77 | OGLProgram program_, |
| @@ -90,6 +90,7 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu | |||
| 90 | } | 90 | } |
| 91 | u32 num_textures{}; | 91 | u32 num_textures{}; |
| 92 | u32 num_images{}; | 92 | u32 num_images{}; |
| 93 | u32 num_storage_buffers{}; | ||
| 93 | for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { | 94 | for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { |
| 94 | const auto& info{stage_infos[stage]}; | 95 | const auto& info{stage_infos[stage]}; |
| 95 | if (stage < 4) { | 96 | if (stage < 4) { |
| @@ -109,11 +110,20 @@ GraphicsPipeline::GraphicsPipeline(TextureCache& texture_cache_, BufferCache& bu | |||
| 109 | 110 | ||
| 110 | num_textures += AccumulateCount(info.texture_descriptors); | 111 | num_textures += AccumulateCount(info.texture_descriptors); |
| 111 | num_images += AccumulateCount(info.image_descriptors); | 112 | num_images += AccumulateCount(info.image_descriptors); |
| 113 | num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); | ||
| 114 | |||
| 115 | writes_global_memory |= std::ranges::any_of( | ||
| 116 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | ||
| 112 | } | 117 | } |
| 113 | ASSERT(num_textures <= MAX_TEXTURES); | 118 | ASSERT(num_textures <= MAX_TEXTURES); |
| 114 | ASSERT(num_images <= MAX_IMAGES); | 119 | ASSERT(num_images <= MAX_IMAGES); |
| 115 | 120 | ||
| 116 | if (assembly_programs[0].handle != 0 && xfb_state) { | 121 | const bool assembly_shaders{assembly_programs[0].handle != 0}; |
| 122 | use_storage_buffers = | ||
| 123 | !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | ||
| 124 | writes_global_memory &= !use_storage_buffers; | ||
| 125 | |||
| 126 | if (assembly_shaders && xfb_state) { | ||
| 117 | GenerateTransformFeedbackState(*xfb_state); | 127 | GenerateTransformFeedbackState(*xfb_state); |
| 118 | } | 128 | } |
| 119 | } | 129 | } |
| @@ -137,6 +147,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { | |||
| 137 | 147 | ||
| 138 | buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); | 148 | buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); |
| 139 | buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); | 149 | buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); |
| 150 | buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); | ||
| 140 | 151 | ||
| 141 | const auto& regs{maxwell3d.regs}; | 152 | const auto& regs{maxwell3d.regs}; |
| 142 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | 153 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 62f700cf5..c1113e180 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | 20 | ||
| 21 | namespace OpenGL { | 21 | namespace OpenGL { |
| 22 | 22 | ||
| 23 | class Device; | ||
| 23 | class ProgramManager; | 24 | class ProgramManager; |
| 24 | 25 | ||
| 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 26 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -60,8 +61,8 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineKey>); | |||
| 60 | 61 | ||
| 61 | class GraphicsPipeline { | 62 | class GraphicsPipeline { |
| 62 | public: | 63 | public: |
| 63 | explicit GraphicsPipeline(TextureCache& texture_cache_, BufferCache& buffer_cache_, | 64 | explicit GraphicsPipeline(const Device& device, TextureCache& texture_cache_, |
| 64 | Tegra::MemoryManager& gpu_memory_, | 65 | BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, |
| 65 | Tegra::Engines::Maxwell3D& maxwell3d_, | 66 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 66 | ProgramManager& program_manager_, StateTracker& state_tracker_, | 67 | ProgramManager& program_manager_, StateTracker& state_tracker_, |
| 67 | OGLProgram program_, | 68 | OGLProgram program_, |
| @@ -77,6 +78,10 @@ public: | |||
| 77 | } | 78 | } |
| 78 | } | 79 | } |
| 79 | 80 | ||
| 81 | [[nodiscard]] bool WritesGlobalMemory() const noexcept { | ||
| 82 | return writes_global_memory; | ||
| 83 | } | ||
| 84 | |||
| 80 | private: | 85 | private: |
| 81 | void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); | 86 | void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); |
| 82 | 87 | ||
| @@ -99,6 +104,9 @@ private: | |||
| 99 | std::array<u32, 5> num_texture_buffers{}; | 104 | std::array<u32, 5> num_texture_buffers{}; |
| 100 | std::array<u32, 5> num_image_buffers{}; | 105 | std::array<u32, 5> num_image_buffers{}; |
| 101 | 106 | ||
| 107 | bool use_storage_buffers{}; | ||
| 108 | bool writes_global_memory{}; | ||
| 109 | |||
| 102 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; | 110 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; |
| 103 | GLsizei num_xfb_attribs{}; | 111 | GLsizei num_xfb_attribs{}; |
| 104 | GLsizei num_xfb_strides{}; | 112 | GLsizei num_xfb_strides{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eec01e8c2..5d4e80364 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -268,19 +268,21 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 268 | EndTransformFeedback(); | 268 | EndTransformFeedback(); |
| 269 | 269 | ||
| 270 | ++num_queued_commands; | 270 | ++num_queued_commands; |
| 271 | has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||
| 271 | 272 | ||
| 272 | gpu.TickWork(); | 273 | gpu.TickWork(); |
| 273 | } | 274 | } |
| 274 | 275 | ||
| 275 | void RasterizerOpenGL::DispatchCompute() { | 276 | void RasterizerOpenGL::DispatchCompute() { |
| 276 | ComputePipeline* const program{shader_cache.CurrentComputePipeline()}; | 277 | ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()}; |
| 277 | if (!program) { | 278 | if (!pipeline) { |
| 278 | return; | 279 | return; |
| 279 | } | 280 | } |
| 280 | program->Configure(); | 281 | pipeline->Configure(); |
| 281 | const auto& qmd{kepler_compute.launch_description}; | 282 | const auto& qmd{kepler_compute.launch_description}; |
| 282 | glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); | 283 | glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); |
| 283 | ++num_queued_commands; | 284 | ++num_queued_commands; |
| 285 | has_written_global_memory |= pipeline->WritesGlobalMemory(); | ||
| 284 | } | 286 | } |
| 285 | 287 | ||
| 286 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 288 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { |
| @@ -449,9 +451,8 @@ void RasterizerOpenGL::FlushCommands() { | |||
| 449 | 451 | ||
| 450 | // Make sure memory stored from the previous GL command stream is visible | 452 | // Make sure memory stored from the previous GL command stream is visible |
| 451 | // This is only needed on assembly shaders where we write to GPU memory with raw pointers | 453 | // This is only needed on assembly shaders where we write to GPU memory with raw pointers |
| 452 | // TODO: Call this only when NV_shader_buffer_load or NV_shader_buffer_store have been used | 454 | if (has_written_global_memory) { |
| 453 | // and prefer using NV_shader_storage_buffer_object when possible | 455 | has_written_global_memory = false; |
| 454 | if (Settings::values.use_assembly_shaders.GetValue()) { | ||
| 455 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | 456 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); |
| 456 | } | 457 | } |
| 457 | glFlush(); | 458 | glFlush(); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index afd43b2ee..d0397b745 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -225,7 +225,8 @@ private: | |||
| 225 | std::array<GLuint, MAX_IMAGES> image_handles{}; | 225 | std::array<GLuint, MAX_IMAGES> image_handles{}; |
| 226 | 226 | ||
| 227 | /// Number of commands queued to the OpenGL driver. Resetted on flush. | 227 | /// Number of commands queued to the OpenGL driver. Resetted on flush. |
| 228 | std::size_t num_queued_commands = 0; | 228 | size_t num_queued_commands = 0; |
| 229 | bool has_written_global_memory = false; | ||
| 229 | 230 | ||
| 230 | u32 last_clip_distance_mask = 0; | 231 | u32 last_clip_distance_mask = 0; |
| 231 | }; | 232 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3aa5ac31d..287f497b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -157,7 +157,8 @@ GLenum AssemblyStage(size_t stage_index) { | |||
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, | 159 | Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, |
| 160 | const Shader::IR::Program& program) { | 160 | const Shader::IR::Program& program, |
| 161 | bool glasm_use_storage_buffers) { | ||
| 161 | Shader::RuntimeInfo info; | 162 | Shader::RuntimeInfo info; |
| 162 | switch (program.stage) { | 163 | switch (program.stage) { |
| 163 | case Shader::Stage::TessellationEval: | 164 | case Shader::Stage::TessellationEval: |
| @@ -220,6 +221,7 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, | |||
| 220 | info.input_topology = Shader::InputTopology::TrianglesAdjacency; | 221 | info.input_topology = Shader::InputTopology::TrianglesAdjacency; |
| 221 | break; | 222 | break; |
| 222 | } | 223 | } |
| 224 | info.glasm_use_storage_buffers = glasm_use_storage_buffers; | ||
| 223 | return info; | 225 | return info; |
| 224 | } | 226 | } |
| 225 | 227 | ||
| @@ -435,7 +437,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 435 | ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs, | 437 | ShaderPools& pools, const GraphicsPipelineKey& key, std::span<Shader::Environment* const> envs, |
| 436 | bool build_in_parallel) { | 438 | bool build_in_parallel) { |
| 437 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | 439 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); |
| 438 | size_t env_index{0}; | 440 | size_t env_index{}; |
| 441 | u32 total_storage_buffers{}; | ||
| 439 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | 442 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 440 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 443 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 441 | if (key.unique_hashes[index] == 0) { | 444 | if (key.unique_hashes[index] == 0) { |
| @@ -447,7 +450,14 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 447 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | 450 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; |
| 448 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); | 451 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); |
| 449 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | 452 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); |
| 453 | |||
| 454 | for (const auto& desc : programs[index].info.storage_buffers_descriptors) { | ||
| 455 | total_storage_buffers += desc.count; | ||
| 456 | } | ||
| 450 | } | 457 | } |
| 458 | const u32 glasm_storage_buffer_limit{device.GetMaxGLASMStorageBufferBlocks()}; | ||
| 459 | const bool glasm_use_storage_buffers{total_storage_buffers <= glasm_storage_buffer_limit}; | ||
| 460 | |||
| 451 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | 461 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; |
| 452 | 462 | ||
| 453 | OGLProgram source_program; | 463 | OGLProgram source_program; |
| @@ -466,7 +476,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 466 | const size_t stage_index{index - 1}; | 476 | const size_t stage_index{index - 1}; |
| 467 | infos[stage_index] = &program.info; | 477 | infos[stage_index] = &program.info; |
| 468 | 478 | ||
| 469 | const Shader::RuntimeInfo runtime_info{MakeRuntimeInfo(key, program)}; | 479 | const auto runtime_info{MakeRuntimeInfo(key, program, glasm_use_storage_buffers)}; |
| 470 | if (device.UseAssemblyShaders()) { | 480 | if (device.UseAssemblyShaders()) { |
| 471 | const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; | 481 | const std::string code{EmitGLASM(profile, runtime_info, program, binding)}; |
| 472 | assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); | 482 | assembly_programs[stage_index] = CompileProgram(code, AssemblyStage(stage_index)); |
| @@ -479,7 +489,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 479 | LinkProgram(source_program.handle); | 489 | LinkProgram(source_program.handle); |
| 480 | } | 490 | } |
| 481 | return std::make_unique<GraphicsPipeline>( | 491 | return std::make_unique<GraphicsPipeline>( |
| 482 | texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, | 492 | device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, |
| 483 | std::move(source_program), std::move(assembly_programs), infos, | 493 | std::move(source_program), std::move(assembly_programs), infos, |
| 484 | key.xfb_enabled != 0 ? &key.xfb_state : nullptr); | 494 | key.xfb_enabled != 0 ? &key.xfb_state : nullptr); |
| 485 | } | 495 | } |
| @@ -508,10 +518,18 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools& | |||
| 508 | 518 | ||
| 509 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 519 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 510 | Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | 520 | Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; |
| 521 | |||
| 522 | u32 num_storage_buffers{}; | ||
| 523 | for (const auto& desc : program.info.storage_buffers_descriptors) { | ||
| 524 | num_storage_buffers += desc.count; | ||
| 525 | } | ||
| 526 | Shader::RuntimeInfo info; | ||
| 527 | info.glasm_use_storage_buffers = num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | ||
| 528 | |||
| 511 | OGLAssemblyProgram asm_program; | 529 | OGLAssemblyProgram asm_program; |
| 512 | OGLProgram source_program; | 530 | OGLProgram source_program; |
| 513 | if (device.UseAssemblyShaders()) { | 531 | if (device.UseAssemblyShaders()) { |
| 514 | const std::string code{EmitGLASM(profile, program)}; | 532 | const std::string code{EmitGLASM(profile, info, program)}; |
| 515 | asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); | 533 | asm_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV); |
| 516 | } else { | 534 | } else { |
| 517 | const std::vector<u32> code{EmitSPIRV(profile, program)}; | 535 | const std::vector<u32> code{EmitSPIRV(profile, program)}; |
| @@ -519,7 +537,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools& | |||
| 519 | AddShader(GL_COMPUTE_SHADER, source_program.handle, code); | 537 | AddShader(GL_COMPUTE_SHADER, source_program.handle, code); |
| 520 | LinkProgram(source_program.handle); | 538 | LinkProgram(source_program.handle); |
| 521 | } | 539 | } |
| 522 | return std::make_unique<ComputePipeline>(texture_cache, buffer_cache, gpu_memory, | 540 | return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory, |
| 523 | kepler_compute, program_manager, program.info, | 541 | kepler_compute, program_manager, program.info, |
| 524 | std::move(source_program), std::move(asm_program)); | 542 | std::move(source_program), std::move(asm_program)); |
| 525 | } | 543 | } |