diff options
Diffstat (limited to 'src/shader_recompiler/backend/glsl/glsl_emit_context.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | 718 |
1 files changed, 718 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp new file mode 100644 index 000000000..97bd59302 --- /dev/null +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | |||
| @@ -0,0 +1,718 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/bindings.h" | ||
| 6 | #include "shader_recompiler/backend/glsl/emit_context.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 8 | #include "shader_recompiler/profile.h" | ||
| 9 | #include "shader_recompiler/runtime_info.h" | ||
| 10 | |||
| 11 | namespace Shader::Backend::GLSL { | ||
| 12 | namespace { | ||
| 13 | u32 CbufIndex(size_t offset) { | ||
| 14 | return (offset / 4) % 4; | ||
| 15 | } | ||
| 16 | |||
| 17 | char Swizzle(size_t offset) { | ||
| 18 | return "xyzw"[CbufIndex(offset)]; | ||
| 19 | } | ||
| 20 | |||
| 21 | std::string_view InterpDecorator(Interpolation interp) { | ||
| 22 | switch (interp) { | ||
| 23 | case Interpolation::Smooth: | ||
| 24 | return ""; | ||
| 25 | case Interpolation::Flat: | ||
| 26 | return "flat "; | ||
| 27 | case Interpolation::NoPerspective: | ||
| 28 | return "noperspective "; | ||
| 29 | } | ||
| 30 | throw InvalidArgument("Invalid interpolation {}", interp); | ||
| 31 | } | ||
| 32 | |||
| 33 | std::string_view InputArrayDecorator(Stage stage) { | ||
| 34 | switch (stage) { | ||
| 35 | case Stage::Geometry: | ||
| 36 | case Stage::TessellationControl: | ||
| 37 | case Stage::TessellationEval: | ||
| 38 | return "[]"; | ||
| 39 | default: | ||
| 40 | return ""; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | bool StoresPerVertexAttributes(Stage stage) { | ||
| 45 | switch (stage) { | ||
| 46 | case Stage::VertexA: | ||
| 47 | case Stage::VertexB: | ||
| 48 | case Stage::Geometry: | ||
| 49 | case Stage::TessellationEval: | ||
| 50 | return true; | ||
| 51 | default: | ||
| 52 | return false; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | std::string OutputDecorator(Stage stage, u32 size) { | ||
| 57 | switch (stage) { | ||
| 58 | case Stage::TessellationControl: | ||
| 59 | return fmt::format("[{}]", size); | ||
| 60 | default: | ||
| 61 | return ""; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | std::string_view SamplerType(TextureType type, bool is_depth) { | ||
| 66 | if (is_depth) { | ||
| 67 | switch (type) { | ||
| 68 | case TextureType::Color1D: | ||
| 69 | return "sampler1DShadow"; | ||
| 70 | case TextureType::ColorArray1D: | ||
| 71 | return "sampler1DArrayShadow"; | ||
| 72 | case TextureType::Color2D: | ||
| 73 | return "sampler2DShadow"; | ||
| 74 | case TextureType::ColorArray2D: | ||
| 75 | return "sampler2DArrayShadow"; | ||
| 76 | case TextureType::ColorCube: | ||
| 77 | return "samplerCubeShadow"; | ||
| 78 | case TextureType::ColorArrayCube: | ||
| 79 | return "samplerCubeArrayShadow"; | ||
| 80 | default: | ||
| 81 | throw NotImplementedException("Texture type: {}", type); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | switch (type) { | ||
| 85 | case TextureType::Color1D: | ||
| 86 | return "sampler1D"; | ||
| 87 | case TextureType::ColorArray1D: | ||
| 88 | return "sampler1DArray"; | ||
| 89 | case TextureType::Color2D: | ||
| 90 | return "sampler2D"; | ||
| 91 | case TextureType::ColorArray2D: | ||
| 92 | return "sampler2DArray"; | ||
| 93 | case TextureType::Color3D: | ||
| 94 | return "sampler3D"; | ||
| 95 | case TextureType::ColorCube: | ||
| 96 | return "samplerCube"; | ||
| 97 | case TextureType::ColorArrayCube: | ||
| 98 | return "samplerCubeArray"; | ||
| 99 | case TextureType::Buffer: | ||
| 100 | return "samplerBuffer"; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Texture type: {}", type); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | std::string_view ImageType(TextureType type) { | ||
| 107 | switch (type) { | ||
| 108 | case TextureType::Color1D: | ||
| 109 | return "uimage1D"; | ||
| 110 | case TextureType::ColorArray1D: | ||
| 111 | return "uimage1DArray"; | ||
| 112 | case TextureType::Color2D: | ||
| 113 | return "uimage2D"; | ||
| 114 | case TextureType::ColorArray2D: | ||
| 115 | return "uimage2DArray"; | ||
| 116 | case TextureType::Color3D: | ||
| 117 | return "uimage3D"; | ||
| 118 | case TextureType::ColorCube: | ||
| 119 | return "uimageCube"; | ||
| 120 | case TextureType::ColorArrayCube: | ||
| 121 | return "uimageCubeArray"; | ||
| 122 | case TextureType::Buffer: | ||
| 123 | return "uimageBuffer"; | ||
| 124 | default: | ||
| 125 | throw NotImplementedException("Image type: {}", type); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | std::string_view ImageFormatString(ImageFormat format) { | ||
| 130 | switch (format) { | ||
| 131 | case ImageFormat::Typeless: | ||
| 132 | return ""; | ||
| 133 | case ImageFormat::R8_UINT: | ||
| 134 | return ",r8ui"; | ||
| 135 | case ImageFormat::R8_SINT: | ||
| 136 | return ",r8i"; | ||
| 137 | case ImageFormat::R16_UINT: | ||
| 138 | return ",r16ui"; | ||
| 139 | case ImageFormat::R16_SINT: | ||
| 140 | return ",r16i"; | ||
| 141 | case ImageFormat::R32_UINT: | ||
| 142 | return ",r32ui"; | ||
| 143 | case ImageFormat::R32G32_UINT: | ||
| 144 | return ",rg32ui"; | ||
| 145 | case ImageFormat::R32G32B32A32_UINT: | ||
| 146 | return ",rgba32ui"; | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Image format: {}", format); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | std::string_view ImageAccessQualifier(bool is_written, bool is_read) { | ||
| 153 | if (is_written && !is_read) { | ||
| 154 | return "writeonly "; | ||
| 155 | } | ||
| 156 | if (is_read && !is_written) { | ||
| 157 | return "readonly "; | ||
| 158 | } | ||
| 159 | return ""; | ||
| 160 | } | ||
| 161 | |||
| 162 | std::string_view GetTessMode(TessPrimitive primitive) { | ||
| 163 | switch (primitive) { | ||
| 164 | case TessPrimitive::Triangles: | ||
| 165 | return "triangles"; | ||
| 166 | case TessPrimitive::Quads: | ||
| 167 | return "quads"; | ||
| 168 | case TessPrimitive::Isolines: | ||
| 169 | return "isolines"; | ||
| 170 | } | ||
| 171 | throw InvalidArgument("Invalid tessellation primitive {}", primitive); | ||
| 172 | } | ||
| 173 | |||
| 174 | std::string_view GetTessSpacing(TessSpacing spacing) { | ||
| 175 | switch (spacing) { | ||
| 176 | case TessSpacing::Equal: | ||
| 177 | return "equal_spacing"; | ||
| 178 | case TessSpacing::FractionalOdd: | ||
| 179 | return "fractional_odd_spacing"; | ||
| 180 | case TessSpacing::FractionalEven: | ||
| 181 | return "fractional_even_spacing"; | ||
| 182 | } | ||
| 183 | throw InvalidArgument("Invalid tessellation spacing {}", spacing); | ||
| 184 | } | ||
| 185 | |||
| 186 | std::string_view InputPrimitive(InputTopology topology) { | ||
| 187 | switch (topology) { | ||
| 188 | case InputTopology::Points: | ||
| 189 | return "points"; | ||
| 190 | case InputTopology::Lines: | ||
| 191 | return "lines"; | ||
| 192 | case InputTopology::LinesAdjacency: | ||
| 193 | return "lines_adjacency"; | ||
| 194 | case InputTopology::Triangles: | ||
| 195 | return "triangles"; | ||
| 196 | case InputTopology::TrianglesAdjacency: | ||
| 197 | return "triangles_adjacency"; | ||
| 198 | } | ||
| 199 | throw InvalidArgument("Invalid input topology {}", topology); | ||
| 200 | } | ||
| 201 | |||
| 202 | std::string_view OutputPrimitive(OutputTopology topology) { | ||
| 203 | switch (topology) { | ||
| 204 | case OutputTopology::PointList: | ||
| 205 | return "points"; | ||
| 206 | case OutputTopology::LineStrip: | ||
| 207 | return "line_strip"; | ||
| 208 | case OutputTopology::TriangleStrip: | ||
| 209 | return "triangle_strip"; | ||
| 210 | } | ||
| 211 | throw InvalidArgument("Invalid output topology {}", topology); | ||
| 212 | } | ||
| 213 | |||
| 214 | void SetupLegacyOutPerVertex(EmitContext& ctx, std::string& header) { | ||
| 215 | if (!ctx.info.stores.Legacy()) { | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | if (ctx.info.stores.FixedFunctionTexture()) { | ||
| 219 | header += "vec4 gl_TexCoord[8];"; | ||
| 220 | } | ||
| 221 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 222 | header += "vec4 gl_FrontColor;"; | ||
| 223 | } | ||
| 224 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorFrontSpecularR)) { | ||
| 225 | header += "vec4 gl_FrontSecondaryColor;"; | ||
| 226 | } | ||
| 227 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackDiffuseR)) { | ||
| 228 | header += "vec4 gl_BackColor;"; | ||
| 229 | } | ||
| 230 | if (ctx.info.stores.AnyComponent(IR::Attribute::ColorBackSpecularR)) { | ||
| 231 | header += "vec4 gl_BackSecondaryColor;"; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | void SetupOutPerVertex(EmitContext& ctx, std::string& header) { | ||
| 236 | if (!StoresPerVertexAttributes(ctx.stage)) { | ||
| 237 | return; | ||
| 238 | } | ||
| 239 | if (ctx.uses_geometry_passthrough) { | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | header += "out gl_PerVertex{vec4 gl_Position;"; | ||
| 243 | if (ctx.info.stores[IR::Attribute::PointSize]) { | ||
| 244 | header += "float gl_PointSize;"; | ||
| 245 | } | ||
| 246 | if (ctx.info.stores.ClipDistances()) { | ||
| 247 | header += "float gl_ClipDistance[];"; | ||
| 248 | } | ||
| 249 | if (ctx.info.stores[IR::Attribute::ViewportIndex] && | ||
| 250 | ctx.profile.support_viewport_index_layer_non_geometry && ctx.stage != Stage::Geometry) { | ||
| 251 | header += "int gl_ViewportIndex;"; | ||
| 252 | } | ||
| 253 | SetupLegacyOutPerVertex(ctx, header); | ||
| 254 | header += "};"; | ||
| 255 | if (ctx.info.stores[IR::Attribute::ViewportIndex] && ctx.stage == Stage::Geometry) { | ||
| 256 | header += "out int gl_ViewportIndex;"; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | void SetupInPerVertex(EmitContext& ctx, std::string& header) { | ||
| 261 | // Currently only required for TessellationControl to adhere to | ||
| 262 | // ARB_separate_shader_objects requirements | ||
| 263 | if (ctx.stage != Stage::TessellationControl) { | ||
| 264 | return; | ||
| 265 | } | ||
| 266 | const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)}; | ||
| 267 | const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]}; | ||
| 268 | const bool loads_clip_distance{ctx.info.loads.ClipDistances()}; | ||
| 269 | const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance}; | ||
| 270 | if (!loads_per_vertex) { | ||
| 271 | return; | ||
| 272 | } | ||
| 273 | header += "in gl_PerVertex{"; | ||
| 274 | if (loads_position) { | ||
| 275 | header += "vec4 gl_Position;"; | ||
| 276 | } | ||
| 277 | if (loads_point_size) { | ||
| 278 | header += "float gl_PointSize;"; | ||
| 279 | } | ||
| 280 | if (loads_clip_distance) { | ||
| 281 | header += "float gl_ClipDistance[];"; | ||
| 282 | } | ||
| 283 | header += "}gl_in[gl_MaxPatchVertices];"; | ||
| 284 | } | ||
| 285 | |||
| 286 | void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { | ||
| 287 | if (!ctx.info.loads.Legacy()) { | ||
| 288 | return; | ||
| 289 | } | ||
| 290 | header += "in gl_PerFragment{"; | ||
| 291 | if (ctx.info.loads.FixedFunctionTexture()) { | ||
| 292 | header += "vec4 gl_TexCoord[8];"; | ||
| 293 | } | ||
| 294 | if (ctx.info.loads.AnyComponent(IR::Attribute::ColorFrontDiffuseR)) { | ||
| 295 | header += "vec4 gl_Color;"; | ||
| 296 | } | ||
| 297 | header += "};"; | ||
| 298 | } | ||
| 299 | |||
| 300 | } // Anonymous namespace | ||
| 301 | |||
| 302 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||
| 303 | const RuntimeInfo& runtime_info_) | ||
| 304 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_}, stage{program.stage}, | ||
| 305 | uses_geometry_passthrough{program.is_geometry_passthrough && | ||
| 306 | profile.support_geometry_shader_passthrough} { | ||
| 307 | if (profile.need_fastmath_off) { | ||
| 308 | header += "#pragma optionNV(fastmath off)\n"; | ||
| 309 | } | ||
| 310 | SetupExtensions(); | ||
| 311 | switch (program.stage) { | ||
| 312 | case Stage::VertexA: | ||
| 313 | case Stage::VertexB: | ||
| 314 | stage_name = "vs"; | ||
| 315 | break; | ||
| 316 | case Stage::TessellationControl: | ||
| 317 | stage_name = "tcs"; | ||
| 318 | header += fmt::format("layout(vertices={})out;", program.invocations); | ||
| 319 | break; | ||
| 320 | case Stage::TessellationEval: | ||
| 321 | stage_name = "tes"; | ||
| 322 | header += fmt::format("layout({},{},{})in;", GetTessMode(runtime_info.tess_primitive), | ||
| 323 | GetTessSpacing(runtime_info.tess_spacing), | ||
| 324 | runtime_info.tess_clockwise ? "cw" : "ccw"); | ||
| 325 | break; | ||
| 326 | case Stage::Geometry: | ||
| 327 | stage_name = "gs"; | ||
| 328 | header += fmt::format("layout({})in;", InputPrimitive(runtime_info.input_topology)); | ||
| 329 | if (uses_geometry_passthrough) { | ||
| 330 | header += "layout(passthrough)in gl_PerVertex{vec4 gl_Position;};"; | ||
| 331 | break; | ||
| 332 | } else if (program.is_geometry_passthrough && | ||
| 333 | !profile.support_geometry_shader_passthrough) { | ||
| 334 | LOG_WARNING(Shader_GLSL, "Passthrough geometry program used but not supported"); | ||
| 335 | } | ||
| 336 | header += fmt::format( | ||
| 337 | "layout({},max_vertices={})out;in gl_PerVertex{{vec4 gl_Position;}}gl_in[];", | ||
| 338 | OutputPrimitive(program.output_topology), program.output_vertices); | ||
| 339 | break; | ||
| 340 | case Stage::Fragment: | ||
| 341 | stage_name = "fs"; | ||
| 342 | position_name = "gl_FragCoord"; | ||
| 343 | if (runtime_info.force_early_z) { | ||
| 344 | header += "layout(early_fragment_tests)in;"; | ||
| 345 | } | ||
| 346 | if (info.uses_sample_id) { | ||
| 347 | header += "in int gl_SampleID;"; | ||
| 348 | } | ||
| 349 | if (info.stores_sample_mask) { | ||
| 350 | header += "out int gl_SampleMask[];"; | ||
| 351 | } | ||
| 352 | break; | ||
| 353 | case Stage::Compute: | ||
| 354 | stage_name = "cs"; | ||
| 355 | const u32 local_x{std::max(program.workgroup_size[0], 1u)}; | ||
| 356 | const u32 local_y{std::max(program.workgroup_size[1], 1u)}; | ||
| 357 | const u32 local_z{std::max(program.workgroup_size[2], 1u)}; | ||
| 358 | header += fmt::format("layout(local_size_x={},local_size_y={},local_size_z={}) in;", | ||
| 359 | local_x, local_y, local_z); | ||
| 360 | break; | ||
| 361 | } | ||
| 362 | SetupOutPerVertex(*this, header); | ||
| 363 | SetupInPerVertex(*this, header); | ||
| 364 | SetupLegacyInPerFragment(*this, header); | ||
| 365 | |||
| 366 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 367 | if (!info.loads.Generic(index) || !runtime_info.previous_stage_stores.Generic(index)) { | ||
| 368 | continue; | ||
| 369 | } | ||
| 370 | const auto qualifier{uses_geometry_passthrough ? "passthrough" | ||
| 371 | : fmt::format("location={}", index)}; | ||
| 372 | header += fmt::format("layout({}){}in vec4 in_attr{}{};", qualifier, | ||
| 373 | InterpDecorator(info.interpolation[index]), index, | ||
| 374 | InputArrayDecorator(stage)); | ||
| 375 | } | ||
| 376 | for (size_t index = 0; index < info.uses_patches.size(); ++index) { | ||
| 377 | if (!info.uses_patches[index]) { | ||
| 378 | continue; | ||
| 379 | } | ||
| 380 | const auto qualifier{stage == Stage::TessellationControl ? "out" : "in"}; | ||
| 381 | header += fmt::format("layout(location={})patch {} vec4 patch{};", index, qualifier, index); | ||
| 382 | } | ||
| 383 | if (stage == Stage::Fragment) { | ||
| 384 | for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { | ||
| 385 | if (!info.stores_frag_color[index] && !profile.need_declared_frag_colors) { | ||
| 386 | continue; | ||
| 387 | } | ||
| 388 | header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); | ||
| 389 | } | ||
| 390 | } | ||
| 391 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 392 | if (info.stores.Generic(index)) { | ||
| 393 | DefineGenericOutput(index, program.invocations); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | if (info.uses_rescaling_uniform) { | ||
| 397 | header += "layout(location=0) uniform vec4 scaling;"; | ||
| 398 | } | ||
| 399 | DefineConstantBuffers(bindings); | ||
| 400 | DefineStorageBuffers(bindings); | ||
| 401 | SetupImages(bindings); | ||
| 402 | SetupTextures(bindings); | ||
| 403 | DefineHelperFunctions(); | ||
| 404 | DefineConstants(); | ||
| 405 | } | ||
| 406 | |||
| 407 | void EmitContext::SetupExtensions() { | ||
| 408 | header += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||
| 409 | if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { | ||
| 410 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | ||
| 411 | } | ||
| 412 | if (info.uses_int64 && profile.support_int64) { | ||
| 413 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 414 | } | ||
| 415 | if (info.uses_int64_bit_atomics) { | ||
| 416 | header += "#extension GL_NV_shader_atomic_int64 : enable\n"; | ||
| 417 | } | ||
| 418 | if (info.uses_atomic_f32_add) { | ||
| 419 | header += "#extension GL_NV_shader_atomic_float : enable\n"; | ||
| 420 | } | ||
| 421 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||
| 422 | header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; | ||
| 423 | } | ||
| 424 | if (info.uses_fp16) { | ||
| 425 | if (profile.support_gl_nv_gpu_shader_5) { | ||
| 426 | header += "#extension GL_NV_gpu_shader5 : enable\n"; | ||
| 427 | } | ||
| 428 | if (profile.support_gl_amd_gpu_shader_half_float) { | ||
| 429 | header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; | ||
| 430 | } | ||
| 431 | } | ||
| 432 | if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||
| 433 | info.uses_subgroup_shuffles || info.uses_fswzadd) { | ||
| 434 | header += "#extension GL_ARB_shader_ballot : enable\n" | ||
| 435 | "#extension GL_ARB_shader_group_vote : enable\n"; | ||
| 436 | if (!info.uses_int64 && profile.support_int64) { | ||
| 437 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 438 | } | ||
| 439 | if (profile.support_gl_warp_intrinsics) { | ||
| 440 | header += "#extension GL_NV_shader_thread_shuffle : enable\n"; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | if ((info.stores[IR::Attribute::ViewportIndex] || info.stores[IR::Attribute::Layer]) && | ||
| 444 | profile.support_viewport_index_layer_non_geometry && stage != Stage::Geometry) { | ||
| 445 | header += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 446 | } | ||
| 447 | if (info.uses_sparse_residency && profile.support_gl_sparse_textures) { | ||
| 448 | header += "#extension GL_ARB_sparse_texture2 : enable\n"; | ||
| 449 | } | ||
| 450 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | ||
| 451 | header += "#extension GL_NV_viewport_array2 : enable\n"; | ||
| 452 | } | ||
| 453 | if (info.uses_typeless_image_reads) { | ||
| 454 | header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; | ||
| 455 | } | ||
| 456 | if (info.uses_derivatives && profile.support_gl_derivative_control) { | ||
| 457 | header += "#extension GL_ARB_derivative_control : enable\n"; | ||
| 458 | } | ||
| 459 | if (uses_geometry_passthrough) { | ||
| 460 | header += "#extension GL_NV_geometry_shader_passthrough : enable\n"; | ||
| 461 | } | ||
| 462 | } | ||
| 463 | |||
| 464 | void EmitContext::DefineConstantBuffers(Bindings& bindings) { | ||
| 465 | if (info.constant_buffer_descriptors.empty()) { | ||
| 466 | return; | ||
| 467 | } | ||
| 468 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 469 | header += fmt::format( | ||
| 470 | "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", | ||
| 471 | bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); | ||
| 472 | bindings.uniform_buffer += desc.count; | ||
| 473 | } | ||
| 474 | } | ||
| 475 | |||
| 476 | void EmitContext::DefineStorageBuffers(Bindings& bindings) { | ||
| 477 | if (info.storage_buffers_descriptors.empty()) { | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | u32 index{}; | ||
| 481 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 482 | header += fmt::format("layout(std430,binding={}) buffer {}_ssbo_{}{{uint {}_ssbo{}[];}};", | ||
| 483 | bindings.storage_buffer, stage_name, bindings.storage_buffer, | ||
| 484 | stage_name, index); | ||
| 485 | bindings.storage_buffer += desc.count; | ||
| 486 | index += desc.count; | ||
| 487 | } | ||
| 488 | } | ||
| 489 | |||
| 490 | void EmitContext::DefineGenericOutput(size_t index, u32 invocations) { | ||
| 491 | static constexpr std::string_view swizzle{"xyzw"}; | ||
| 492 | const size_t base_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4}; | ||
| 493 | u32 element{0}; | ||
| 494 | while (element < 4) { | ||
| 495 | std::string definition{fmt::format("layout(location={}", index)}; | ||
| 496 | const u32 remainder{4 - element}; | ||
| 497 | const TransformFeedbackVarying* xfb_varying{}; | ||
| 498 | if (!runtime_info.xfb_varyings.empty()) { | ||
| 499 | xfb_varying = &runtime_info.xfb_varyings[base_index + element]; | ||
| 500 | xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr; | ||
| 501 | } | ||
| 502 | const u32 num_components{xfb_varying ? xfb_varying->components : remainder}; | ||
| 503 | if (element > 0) { | ||
| 504 | definition += fmt::format(",component={}", element); | ||
| 505 | } | ||
| 506 | if (xfb_varying) { | ||
| 507 | definition += | ||
| 508 | fmt::format(",xfb_buffer={},xfb_stride={},xfb_offset={}", xfb_varying->buffer, | ||
| 509 | xfb_varying->stride, xfb_varying->offset); | ||
| 510 | } | ||
| 511 | std::string name{fmt::format("out_attr{}", index)}; | ||
| 512 | if (num_components < 4 || element > 0) { | ||
| 513 | name += fmt::format("_{}", swizzle.substr(element, num_components)); | ||
| 514 | } | ||
| 515 | const auto type{num_components == 1 ? "float" : fmt::format("vec{}", num_components)}; | ||
| 516 | definition += fmt::format(")out {} {}{};", type, name, OutputDecorator(stage, invocations)); | ||
| 517 | header += definition; | ||
| 518 | |||
| 519 | const GenericElementInfo element_info{ | ||
| 520 | .name = name, | ||
| 521 | .first_element = element, | ||
| 522 | .num_components = num_components, | ||
| 523 | }; | ||
| 524 | std::fill_n(output_generics[index].begin() + element, num_components, element_info); | ||
| 525 | element += num_components; | ||
| 526 | } | ||
| 527 | } | ||
| 528 | |||
| 529 | void EmitContext::DefineHelperFunctions() { | ||
| 530 | header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n" | ||
| 531 | "#define itof intBitsToFloat\n#define utof uintBitsToFloat\n"; | ||
| 532 | if (info.uses_global_increment || info.uses_shared_increment) { | ||
| 533 | header += "uint CasIncrement(uint op_a,uint op_b){return op_a>=op_b?0u:(op_a+1u);}"; | ||
| 534 | } | ||
| 535 | if (info.uses_global_decrement || info.uses_shared_decrement) { | ||
| 536 | header += "uint CasDecrement(uint op_a,uint op_b){" | ||
| 537 | "return op_a==0||op_a>op_b?op_b:(op_a-1u);}"; | ||
| 538 | } | ||
| 539 | if (info.uses_atomic_f32_add) { | ||
| 540 | header += "uint CasFloatAdd(uint op_a,float op_b){" | ||
| 541 | "return ftou(utof(op_a)+op_b);}"; | ||
| 542 | } | ||
| 543 | if (info.uses_atomic_f32x2_add) { | ||
| 544 | header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){" | ||
| 545 | "return packHalf2x16(unpackHalf2x16(op_a)+op_b);}"; | ||
| 546 | } | ||
| 547 | if (info.uses_atomic_f32x2_min) { | ||
| 548 | header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " | ||
| 549 | "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}"; | ||
| 550 | } | ||
| 551 | if (info.uses_atomic_f32x2_max) { | ||
| 552 | header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " | ||
| 553 | "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}"; | ||
| 554 | } | ||
| 555 | if (info.uses_atomic_f16x2_add) { | ||
| 556 | header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " | ||
| 557 | "packFloat2x16(unpackFloat2x16(op_a)+op_b);}"; | ||
| 558 | } | ||
| 559 | if (info.uses_atomic_f16x2_min) { | ||
| 560 | header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " | ||
| 561 | "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}"; | ||
| 562 | } | ||
| 563 | if (info.uses_atomic_f16x2_max) { | ||
| 564 | header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " | ||
| 565 | "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}"; | ||
| 566 | } | ||
| 567 | if (info.uses_atomic_s32_min) { | ||
| 568 | header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; | ||
| 569 | } | ||
| 570 | if (info.uses_atomic_s32_max) { | ||
| 571 | header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; | ||
| 572 | } | ||
| 573 | if (info.uses_global_memory && profile.support_int64) { | ||
| 574 | header += DefineGlobalMemoryFunctions(); | ||
| 575 | } | ||
| 576 | if (info.loads_indexed_attributes) { | ||
| 577 | const bool is_array{stage == Stage::Geometry}; | ||
| 578 | const auto vertex_arg{is_array ? ",uint vertex" : ""}; | ||
| 579 | std::string func{ | ||
| 580 | fmt::format("float IndexedAttrLoad(int offset{}){{int base_index=offset>>2;uint " | ||
| 581 | "masked_index=uint(base_index)&3u;switch(base_index>>2){{", | ||
| 582 | vertex_arg)}; | ||
| 583 | if (info.loads.AnyComponent(IR::Attribute::PositionX)) { | ||
| 584 | const auto position_idx{is_array ? "gl_in[vertex]." : ""}; | ||
| 585 | func += fmt::format("case {}:return {}{}[masked_index];", | ||
| 586 | static_cast<u32>(IR::Attribute::PositionX) >> 2, position_idx, | ||
| 587 | position_name); | ||
| 588 | } | ||
| 589 | const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||
| 590 | for (u32 index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 591 | if (!info.loads.Generic(index)) { | ||
| 592 | continue; | ||
| 593 | } | ||
| 594 | const auto vertex_idx{is_array ? "[vertex]" : ""}; | ||
| 595 | func += fmt::format("case {}:return in_attr{}{}[masked_index];", | ||
| 596 | base_attribute_value + index, index, vertex_idx); | ||
| 597 | } | ||
| 598 | func += "default: return 0.0;}}"; | ||
| 599 | header += func; | ||
| 600 | } | ||
| 601 | if (info.stores_indexed_attributes) { | ||
| 602 | // TODO | ||
| 603 | } | ||
| 604 | } | ||
| 605 | |||
| 606 | std::string EmitContext::DefineGlobalMemoryFunctions() { | ||
| 607 | const auto define_body{[&](std::string& func, size_t index, std::string_view return_statement) { | ||
| 608 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||
| 609 | const u32 size_cbuf_offset{ssbo.cbuf_offset + 8}; | ||
| 610 | const auto ssbo_addr{fmt::format("ssbo_addr{}", index)}; | ||
| 611 | const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)}; | ||
| 612 | std::array<std::string, 2> addr_xy; | ||
| 613 | std::array<std::string, 2> size_xy; | ||
| 614 | for (size_t i = 0; i < addr_xy.size(); ++i) { | ||
| 615 | const auto addr_loc{ssbo.cbuf_offset + 4 * i}; | ||
| 616 | const auto size_loc{size_cbuf_offset + 4 * i}; | ||
| 617 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | ||
| 618 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | ||
| 619 | } | ||
| 620 | const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; | ||
| 621 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | ||
| 622 | func += addr_statment; | ||
| 623 | |||
| 624 | const auto size_vec{fmt::format("uvec2({},{})", size_xy[0], size_xy[1])}; | ||
| 625 | const auto comp_lhs{fmt::format("(addr>={})", ssbo_addr)}; | ||
| 626 | const auto comp_rhs{fmt::format("(addr<({}+uint64_t({})))", ssbo_addr, size_vec)}; | ||
| 627 | const auto comparison{fmt::format("if({}&&{}){{", comp_lhs, comp_rhs)}; | ||
| 628 | func += comparison; | ||
| 629 | |||
| 630 | const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)}; | ||
| 631 | func += fmt::format(fmt::runtime(return_statement), ssbo_name, ssbo_addr); | ||
| 632 | }}; | ||
| 633 | std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){"}; | ||
| 634 | std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){"}; | ||
| 635 | std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){"}; | ||
| 636 | std::string load_func{"uint LoadGlobal32(uint64_t addr){"}; | ||
| 637 | std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){"}; | ||
| 638 | std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){"}; | ||
| 639 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||
| 640 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 641 | if (!info.nvn_buffer_used[index]) { | ||
| 642 | continue; | ||
| 643 | } | ||
| 644 | define_body(write_func, index, "{0}[uint(addr-{1})>>2]=data;return;}}"); | ||
| 645 | define_body(write_func_64, index, | ||
| 646 | "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;return;}}"); | ||
| 647 | define_body(write_func_128, index, | ||
| 648 | "{0}[uint(addr-{1})>>2]=data.x;{0}[uint(addr-{1}+4)>>2]=data.y;{0}[uint(" | ||
| 649 | "addr-{1}+8)>>2]=data.z;{0}[uint(addr-{1}+12)>>2]=data.w;return;}}"); | ||
| 650 | define_body(load_func, index, "return {0}[uint(addr-{1})>>2];}}"); | ||
| 651 | define_body(load_func_64, index, | ||
| 652 | "return uvec2({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2]);}}"); | ||
| 653 | define_body(load_func_128, index, | ||
| 654 | "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" | ||
| 655 | "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); | ||
| 656 | } | ||
| 657 | write_func += '}'; | ||
| 658 | write_func_64 += '}'; | ||
| 659 | write_func_128 += '}'; | ||
| 660 | load_func += "return 0u;}"; | ||
| 661 | load_func_64 += "return uvec2(0);}"; | ||
| 662 | load_func_128 += "return uvec4(0);}"; | ||
| 663 | return write_func + write_func_64 + write_func_128 + load_func + load_func_64 + load_func_128; | ||
| 664 | } | ||
| 665 | |||
| 666 | void EmitContext::SetupImages(Bindings& bindings) { | ||
| 667 | image_buffers.reserve(info.image_buffer_descriptors.size()); | ||
| 668 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 669 | image_buffers.push_back({bindings.image, desc.count}); | ||
| 670 | const auto format{ImageFormatString(desc.format)}; | ||
| 671 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 672 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 673 | header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", | ||
| 674 | bindings.image, format, qualifier, bindings.image, array_decorator); | ||
| 675 | bindings.image += desc.count; | ||
| 676 | } | ||
| 677 | images.reserve(info.image_descriptors.size()); | ||
| 678 | for (const auto& desc : info.image_descriptors) { | ||
| 679 | images.push_back({bindings.image, desc.count}); | ||
| 680 | const auto format{ImageFormatString(desc.format)}; | ||
| 681 | const auto image_type{ImageType(desc.type)}; | ||
| 682 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 683 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 684 | header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, | ||
| 685 | qualifier, image_type, bindings.image, array_decorator); | ||
| 686 | bindings.image += desc.count; | ||
| 687 | } | ||
| 688 | } | ||
| 689 | |||
| 690 | void EmitContext::SetupTextures(Bindings& bindings) { | ||
| 691 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); | ||
| 692 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 693 | texture_buffers.push_back({bindings.texture, desc.count}); | ||
| 694 | const auto sampler_type{SamplerType(TextureType::Buffer, false)}; | ||
| 695 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 696 | header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, | ||
| 697 | sampler_type, bindings.texture, array_decorator); | ||
| 698 | bindings.texture += desc.count; | ||
| 699 | } | ||
| 700 | textures.reserve(info.texture_descriptors.size()); | ||
| 701 | for (const auto& desc : info.texture_descriptors) { | ||
| 702 | textures.push_back({bindings.texture, desc.count}); | ||
| 703 | const auto sampler_type{SamplerType(desc.type, desc.is_depth)}; | ||
| 704 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||
| 705 | header += fmt::format("layout(binding={}) uniform {} tex{}{};", bindings.texture, | ||
| 706 | sampler_type, bindings.texture, array_decorator); | ||
| 707 | bindings.texture += desc.count; | ||
| 708 | } | ||
| 709 | } | ||
| 710 | |||
| 711 | void EmitContext::DefineConstants() { | ||
| 712 | if (info.uses_fswzadd) { | ||
| 713 | header += "const float FSWZ_A[]=float[4](-1.f,1.f,-1.f,0.f);" | ||
| 714 | "const float FSWZ_B[]=float[4](-1.f,-1.f,1.f,-1.f);"; | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | } // namespace Shader::Backend::GLSL | ||