diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.cpp | 44 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.h | 2 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | 2 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | 22 | ||||
| -rw-r--r-- | src/shader_recompiler/ir_opt/texture_pass.cpp | 11 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 2 | ||||
| -rw-r--r-- | src/shader_recompiler/shader_info.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 32 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 |
11 files changed, 86 insertions, 38 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 0e8fe017d..d224c4d84 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp | |||
| @@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) { | |||
| 148 | } | 148 | } |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | std::string_view ImageAccessQualifier(bool is_written, bool is_read) { | ||
| 152 | if (is_written && !is_read) { | ||
| 153 | return "writeonly "; | ||
| 154 | } | ||
| 155 | if (is_read && !is_written) { | ||
| 156 | return "readonly "; | ||
| 157 | } | ||
| 158 | return ""; | ||
| 159 | } | ||
| 160 | |||
| 151 | std::string_view GetTessMode(TessPrimitive primitive) { | 161 | std::string_view GetTessMode(TessPrimitive primitive) { |
| 152 | switch (primitive) { | 162 | switch (primitive) { |
| 153 | case TessPrimitive::Triangles: | 163 | case TessPrimitive::Triangles: |
| @@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { | |||
| 262 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | 272 | EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, |
| 263 | const RuntimeInfo& runtime_info_) | 273 | const RuntimeInfo& runtime_info_) |
| 264 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { | 274 | : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { |
| 265 | header += "#pragma optionNV(fastmath off)\n"; | 275 | if (profile.need_fastmath_off) { |
| 276 | header += "#pragma optionNV(fastmath off)\n"; | ||
| 277 | } | ||
| 266 | SetupExtensions(); | 278 | SetupExtensions(); |
| 267 | stage = program.stage; | 279 | stage = program.stage; |
| 268 | switch (program.stage) { | 280 | switch (program.stage) { |
| @@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||
| 335 | } | 347 | } |
| 336 | for (size_t index = 0; index < info.stores_generics.size(); ++index) { | 348 | for (size_t index = 0; index < info.stores_generics.size(); ++index) { |
| 337 | // TODO: Properly resolve attribute issues | 349 | // TODO: Properly resolve attribute issues |
| 338 | if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { | 350 | if (info.stores_generics[index] || StageInitializesVaryings()) { |
| 339 | DefineGenericOutput(index, program.invocations); | 351 | DefineGenericOutput(index, program.invocations); |
| 340 | } | 352 | } |
| 341 | } | 353 | } |
| @@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||
| 347 | DefineConstants(); | 359 | DefineConstants(); |
| 348 | } | 360 | } |
| 349 | 361 | ||
| 362 | bool EmitContext::StageInitializesVaryings() const noexcept { | ||
| 363 | switch (stage) { | ||
| 364 | case Stage::VertexA: | ||
| 365 | case Stage::VertexB: | ||
| 366 | case Stage::Geometry: | ||
| 367 | return true; | ||
| 368 | default: | ||
| 369 | return false; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | |||
| 350 | void EmitContext::SetupExtensions() { | 373 | void EmitContext::SetupExtensions() { |
| 351 | if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { | 374 | if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { |
| 352 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | 375 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; |
| @@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() { | |||
| 361 | header += "#extension GL_NV_shader_atomic_float : enable\n"; | 384 | header += "#extension GL_NV_shader_atomic_float : enable\n"; |
| 362 | } | 385 | } |
| 363 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | 386 | if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { |
| 364 | header += "#extension NV_shader_atomic_fp16_vector : enable\n"; | 387 | header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; |
| 365 | } | 388 | } |
| 366 | if (info.uses_fp16) { | 389 | if (info.uses_fp16) { |
| 367 | if (profile.support_gl_nv_gpu_shader_5) { | 390 | if (profile.support_gl_nv_gpu_shader_5) { |
| @@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() { | |||
| 392 | if (info.stores_viewport_mask && profile.support_viewport_mask) { | 415 | if (info.stores_viewport_mask && profile.support_viewport_mask) { |
| 393 | header += "#extension GL_NV_viewport_array2 : enable\n"; | 416 | header += "#extension GL_NV_viewport_array2 : enable\n"; |
| 394 | } | 417 | } |
| 395 | if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { | 418 | if (info.uses_typeless_image_reads) { |
| 396 | header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; | 419 | header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; |
| 397 | } | 420 | } |
| 398 | if (info.uses_derivatives && profile.support_gl_derivative_control) { | 421 | if (info.uses_derivatives && profile.support_gl_derivative_control) { |
| @@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | |||
| 593 | "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" | 616 | "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" |
| 594 | "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); | 617 | "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); |
| 595 | } | 618 | } |
| 596 | write_func += "}"; | 619 | write_func += '}'; |
| 597 | write_func_64 += "}"; | 620 | write_func_64 += '}'; |
| 598 | write_func_128 += "}"; | 621 | write_func_128 += '}'; |
| 599 | load_func += "return 0u;}"; | 622 | load_func += "return 0u;}"; |
| 600 | load_func_64 += "return uvec2(0);}"; | 623 | load_func_64 += "return uvec2(0);}"; |
| 601 | load_func_128 += "return uvec4(0);}"; | 624 | load_func_128 += "return uvec4(0);}"; |
| @@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) { | |||
| 607 | for (const auto& desc : info.image_buffer_descriptors) { | 630 | for (const auto& desc : info.image_buffer_descriptors) { |
| 608 | image_buffers.push_back({bindings.image, desc.count}); | 631 | image_buffers.push_back({bindings.image, desc.count}); |
| 609 | const auto format{ImageFormatString(desc.format)}; | 632 | const auto format{ImageFormatString(desc.format)}; |
| 633 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||
| 610 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | 634 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; |
| 611 | header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, | 635 | header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", |
| 612 | format, bindings.image, array_decorator); | 636 | bindings.image, format, qualifier, bindings.image, array_decorator); |
| 613 | bindings.image += desc.count; | 637 | bindings.image += desc.count; |
| 614 | } | 638 | } |
| 615 | images.reserve(info.image_descriptors.size()); | 639 | images.reserve(info.image_descriptors.size()); |
| @@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) { | |||
| 617 | images.push_back({bindings.image, desc.count}); | 641 | images.push_back({bindings.image, desc.count}); |
| 618 | const auto format{ImageFormatString(desc.format)}; | 642 | const auto format{ImageFormatString(desc.format)}; |
| 619 | const auto image_type{ImageType(desc.type)}; | 643 | const auto image_type{ImageType(desc.type)}; |
| 620 | const auto qualifier{desc.is_written ? "" : "readonly "}; | 644 | const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; |
| 621 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | 645 | const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; |
| 622 | header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, | 646 | header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, |
| 623 | qualifier, image_type, bindings.image, array_decorator); | 647 | qualifier, image_type, bindings.image, array_decorator); |
diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index 8fa87c02c..4a50556e1 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h | |||
| @@ -136,6 +136,8 @@ public: | |||
| 136 | code += '\n'; | 136 | code += '\n'; |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | [[nodiscard]] bool StageInitializesVaryings() const noexcept; | ||
| 140 | |||
| 139 | std::string header; | 141 | std::string header; |
| 140 | std::string code; | 142 | std::string code; |
| 141 | VarAlloc var_alloc; | 143 | VarAlloc var_alloc; |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index edeecc26e..a241d18fe 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val | |||
| 329 | ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); | 329 | ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); |
| 330 | break; | 330 | break; |
| 331 | case IR::Attribute::FogCoordinate: | 331 | case IR::Attribute::FogCoordinate: |
| 332 | ctx.Add("gl_FogFragCoord.x={};", value); | 332 | ctx.Add("gl_FogFragCoord={};", value); |
| 333 | break; | 333 | break; |
| 334 | case IR::Attribute::ClipDistance0: | 334 | case IR::Attribute::ClipDistance0: |
| 335 | case IR::Attribute::ClipDistance1: | 335 | case IR::Attribute::ClipDistance1: |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp index cfef58d79..59ca52f07 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_special.cpp | |||
| @@ -10,6 +10,17 @@ | |||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | 10 | #include "shader_recompiler/frontend/ir/value.h" |
| 11 | 11 | ||
| 12 | namespace Shader::Backend::GLSL { | 12 | namespace Shader::Backend::GLSL { |
| 13 | namespace { | ||
| 14 | void InitializeVaryings(EmitContext& ctx) { | ||
| 15 | ctx.Add("gl_Position=vec4(0,0,0,1);"); | ||
| 16 | // TODO: Properly resolve attribute issues | ||
| 17 | for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { | ||
| 18 | if (!ctx.info.stores_generics[index]) { | ||
| 19 | ctx.Add("out_attr{}=vec4(0,0,0,1);", index); | ||
| 20 | } | ||
| 21 | } | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 13 | 24 | ||
| 14 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | 25 | void EmitPhi(EmitContext& ctx, IR::Inst& phi) { |
| 15 | const size_t num_args{phi.NumArgs()}; | 26 | const size_t num_args{phi.NumArgs()}; |
| @@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& | |||
| 44 | } | 55 | } |
| 45 | 56 | ||
| 46 | void EmitPrologue(EmitContext& ctx) { | 57 | void EmitPrologue(EmitContext& ctx) { |
| 47 | if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { | 58 | if (ctx.StageInitializesVaryings()) { |
| 48 | ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | 59 | InitializeVaryings(ctx); |
| 49 | // TODO: Properly resolve attribute issues | ||
| 50 | for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { | ||
| 51 | if (!ctx.info.stores_generics[index]) { | ||
| 52 | ctx.Add("out_attr{}=vec4(0,0,0,1);", index); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | } | 60 | } |
| 56 | } | 61 | } |
| 57 | 62 | ||
| @@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {} | |||
| 59 | 64 | ||
| 60 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | 65 | void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { |
| 61 | ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); | 66 | ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); |
| 67 | InitializeVaryings(ctx); | ||
| 62 | } | 68 | } |
| 63 | 69 | ||
| 64 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | 70 | void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { |
diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index e9098239d..737f186ab 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp | |||
| @@ -312,11 +312,14 @@ public: | |||
| 312 | } | 312 | } |
| 313 | 313 | ||
| 314 | u32 Add(const ImageBufferDescriptor& desc) { | 314 | u32 Add(const ImageBufferDescriptor& desc) { |
| 315 | return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { | 315 | const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { |
| 316 | return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && | 316 | return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && |
| 317 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | 317 | desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && |
| 318 | desc.size_shift == existing.size_shift; | 318 | desc.size_shift == existing.size_shift; |
| 319 | }); | 319 | })}; |
| 320 | image_buffer_descriptors[index].is_written |= desc.is_written; | ||
| 321 | image_buffer_descriptors[index].is_read |= desc.is_read; | ||
| 322 | return index; | ||
| 320 | } | 323 | } |
| 321 | 324 | ||
| 322 | u32 Add(const TextureDescriptor& desc) { | 325 | u32 Add(const TextureDescriptor& desc) { |
| @@ -339,6 +342,7 @@ public: | |||
| 339 | desc.size_shift == existing.size_shift; | 342 | desc.size_shift == existing.size_shift; |
| 340 | })}; | 343 | })}; |
| 341 | image_descriptors[index].is_written |= desc.is_written; | 344 | image_descriptors[index].is_written |= desc.is_written; |
| 345 | image_descriptors[index].is_read |= desc.is_read; | ||
| 342 | return index; | 346 | return index; |
| 343 | } | 347 | } |
| 344 | 348 | ||
| @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { | |||
| 430 | throw NotImplementedException("Unexpected separate sampler"); | 434 | throw NotImplementedException("Unexpected separate sampler"); |
| 431 | } | 435 | } |
| 432 | const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; | 436 | const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; |
| 437 | const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; | ||
| 433 | if (flags.type == TextureType::Buffer) { | 438 | if (flags.type == TextureType::Buffer) { |
| 434 | index = descriptors.Add(ImageBufferDescriptor{ | 439 | index = descriptors.Add(ImageBufferDescriptor{ |
| 435 | .format = flags.image_format, | 440 | .format = flags.image_format, |
| 436 | .is_written = is_written, | 441 | .is_written = is_written, |
| 442 | .is_read = is_read, | ||
| 437 | .cbuf_index = cbuf.index, | 443 | .cbuf_index = cbuf.index, |
| 438 | .cbuf_offset = cbuf.offset, | 444 | .cbuf_offset = cbuf.offset, |
| 439 | .count = cbuf.count, | 445 | .count = cbuf.count, |
| @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { | |||
| 444 | .type = flags.type, | 450 | .type = flags.type, |
| 445 | .format = flags.image_format, | 451 | .format = flags.image_format, |
| 446 | .is_written = is_written, | 452 | .is_written = is_written, |
| 453 | .is_read = is_read, | ||
| 447 | .cbuf_index = cbuf.index, | 454 | .cbuf_index = cbuf.index, |
| 448 | .cbuf_offset = cbuf.offset, | 455 | .cbuf_offset = cbuf.offset, |
| 449 | .count = cbuf.count, | 456 | .count = cbuf.count, |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index e8cfc03af..a3c412a0f 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -97,6 +97,8 @@ struct Profile { | |||
| 97 | /// Fragment outputs have to be declared even if they are not written to avoid undefined values. | 97 | /// Fragment outputs have to be declared even if they are not written to avoid undefined values. |
| 98 | /// See Ori and the Blind Forest's main menu for reference. | 98 | /// See Ori and the Blind Forest's main menu for reference. |
| 99 | bool need_declared_frag_colors{}; | 99 | bool need_declared_frag_colors{}; |
| 100 | /// Prevents fast math optimizations that may cause inaccuracies | ||
| 101 | bool need_fastmath_off{}; | ||
| 100 | 102 | ||
| 101 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | 103 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead |
| 102 | bool has_broken_spirv_clamp{}; | 104 | bool has_broken_spirv_clamp{}; |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 74d7a6a94..e9ebc16a4 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes | |||
| 75 | struct ImageBufferDescriptor { | 75 | struct ImageBufferDescriptor { |
| 76 | ImageFormat format; | 76 | ImageFormat format; |
| 77 | bool is_written; | 77 | bool is_written; |
| 78 | bool is_read; | ||
| 78 | u32 cbuf_index; | 79 | u32 cbuf_index; |
| 79 | u32 cbuf_offset; | 80 | u32 cbuf_offset; |
| 80 | u32 count; | 81 | u32 count; |
| @@ -99,6 +100,7 @@ struct ImageDescriptor { | |||
| 99 | TextureType type; | 100 | TextureType type; |
| 100 | ImageFormat format; | 101 | ImageFormat format; |
| 101 | bool is_written; | 102 | bool is_written; |
| 103 | bool is_read; | ||
| 102 | u32 cbuf_index; | 104 | u32 cbuf_index; |
| 103 | u32 cbuf_offset; | 105 | u32 cbuf_offset; |
| 104 | u32 count; | 106 | u32 count; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index bf08a6d93..5838fc02f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -162,6 +162,7 @@ Device::Device() { | |||
| 162 | has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; | 162 | has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; |
| 163 | has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; | 163 | has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; |
| 164 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | 164 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; |
| 165 | need_fastmath_off = is_nvidia; | ||
| 165 | 166 | ||
| 166 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 167 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
| 167 | // uniform buffers as "push constants" | 168 | // uniform buffers as "push constants" |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 0b59c9df0..0c9d6fe31 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -136,6 +136,10 @@ public: | |||
| 136 | return warp_size_potentially_larger_than_guest; | 136 | return warp_size_potentially_larger_than_guest; |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | bool NeedsFastmathOff() const { | ||
| 140 | return need_fastmath_off; | ||
| 141 | } | ||
| 142 | |||
| 139 | private: | 143 | private: |
| 140 | static bool TestVariableAoffi(); | 144 | static bool TestVariableAoffi(); |
| 141 | static bool TestPreciseBug(); | 145 | static bool TestPreciseBug(); |
| @@ -171,6 +175,7 @@ private: | |||
| 171 | bool has_amd_shader_half_float{}; | 175 | bool has_amd_shader_half_float{}; |
| 172 | bool has_sparse_texture_2{}; | 176 | bool has_sparse_texture_2{}; |
| 173 | bool warp_size_potentially_larger_than_guest{}; | 177 | bool warp_size_potentially_larger_than_guest{}; |
| 178 | bool need_fastmath_off{}; | ||
| 174 | }; | 179 | }; |
| 175 | 180 | ||
| 176 | } // namespace OpenGL | 181 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d27a3cf46..8d11fbc55 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 132 | std::ranges::transform(infos, stage_infos.begin(), | 132 | std::ranges::transform(infos, stage_infos.begin(), |
| 133 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | 133 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); |
| 134 | auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { | 134 | auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { |
| 135 | if (device.UseAssemblyShaders()) { | 135 | if (!device.UseAssemblyShaders()) { |
| 136 | for (size_t stage = 0; stage < 5; ++stage) { | 136 | program.handle = glCreateProgram(); |
| 137 | const auto code{sources[stage]}; | 137 | } |
| 138 | if (code.empty()) { | 138 | for (size_t stage = 0; stage < 5; ++stage) { |
| 139 | continue; | 139 | const auto code{sources[stage]}; |
| 140 | } | 140 | if (code.empty()) { |
| 141 | continue; | ||
| 142 | } | ||
| 143 | if (device.UseAssemblyShaders()) { | ||
| 141 | assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); | 144 | assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); |
| 142 | enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; | 145 | enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; |
| 143 | } | 146 | } else { |
| 144 | } else { | ||
| 145 | program.handle = glCreateProgram(); | ||
| 146 | for (size_t stage = 0; stage < 5; ++stage) { | ||
| 147 | const auto code{sources[stage]}; | ||
| 148 | if (code.empty()) { | ||
| 149 | continue; | ||
| 150 | } | ||
| 151 | AttachShader(Stage(stage), program.handle, code); | 147 | AttachShader(Stage(stage), program.handle, code); |
| 152 | } | 148 | } |
| 153 | LinkProgram(program.handle); | ||
| 154 | } | 149 | } |
| 155 | if (shader_notify) { | 150 | if (!device.UseAssemblyShaders()) { |
| 156 | shader_notify->MarkShaderComplete(); | 151 | LinkProgram(program.handle); |
| 157 | } | 152 | } |
| 158 | u32 num_textures{}; | 153 | u32 num_textures{}; |
| 159 | u32 num_images{}; | 154 | u32 num_images{}; |
| @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 198 | if (assembly_shaders && xfb_state) { | 193 | if (assembly_shaders && xfb_state) { |
| 199 | GenerateTransformFeedbackState(*xfb_state); | 194 | GenerateTransformFeedbackState(*xfb_state); |
| 200 | } | 195 | } |
| 196 | if (shader_notify) { | ||
| 197 | shader_notify->MarkShaderComplete(); | ||
| 198 | } | ||
| 201 | is_built.store(true, std::memory_order_relaxed); | 199 | is_built.store(true, std::memory_order_relaxed); |
| 202 | }}; | 200 | }}; |
| 203 | if (thread_worker) { | 201 | if (thread_worker) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index fedbce2f0..620666622 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 193 | 193 | ||
| 194 | .lower_left_origin_mode = true, | 194 | .lower_left_origin_mode = true, |
| 195 | .need_declared_frag_colors = true, | 195 | .need_declared_frag_colors = true, |
| 196 | .need_fastmath_off = device.NeedsFastmathOff(), | ||
| 196 | 197 | ||
| 197 | .has_broken_spirv_clamp = true, | 198 | .has_broken_spirv_clamp = true, |
| 198 | .has_broken_unsigned_image_offsets = true, | 199 | .has_broken_unsigned_image_offsets = true, |