diff options
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_context.cpp | 12 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | 43 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 |
5 files changed, 43 insertions, 20 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 5456d4e5b..c6325e55f 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp | |||
| @@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||
| 122 | 122 | ||
| 123 | void EmitContext::SetupExtensions(std::string&) { | 123 | void EmitContext::SetupExtensions(std::string&) { |
| 124 | header += "#extension GL_ARB_separate_shader_objects : enable\n"; | 124 | header += "#extension GL_ARB_separate_shader_objects : enable\n"; |
| 125 | header += "#extension GL_ARB_sparse_texture2 : enable\n"; | 125 | if (stage != Stage::Compute) { |
| 126 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | 126 | // TODO: track this usage |
| 127 | // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; | 127 | header += "#extension GL_ARB_sparse_texture2 : enable\n"; |
| 128 | header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | ||
| 129 | } | ||
| 128 | if (info.uses_int64) { | 130 | if (info.uses_int64) { |
| 129 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | 131 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; |
| 130 | } | 132 | } |
| @@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) { | |||
| 149 | info.uses_subgroup_shuffles || info.uses_fswzadd) { | 151 | info.uses_subgroup_shuffles || info.uses_fswzadd) { |
| 150 | header += "#extension GL_ARB_shader_ballot : enable\n"; | 152 | header += "#extension GL_ARB_shader_ballot : enable\n"; |
| 151 | header += "#extension GL_ARB_shader_group_vote : enable\n"; | 153 | header += "#extension GL_ARB_shader_group_vote : enable\n"; |
| 154 | header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; | ||
| 155 | if (!info.uses_int64) { | ||
| 156 | header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||
| 157 | } | ||
| 152 | } | 158 | } |
| 153 | } | 159 | } |
| 154 | 160 | ||
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index e462c977c..8a018acb5 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp | |||
| @@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in | |||
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 44 | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 45 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | 45 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 46 | // TODO: | 46 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |
| 47 | // if (ctx.profile.warp_size_potentially_larger_than_guest) { | 47 | } else { |
| 48 | // } | 48 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; |
| 49 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; | ||
| 50 | ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||
| 51 | } | ||
| 49 | } | 52 | } |
| 50 | 53 | ||
| 51 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 54 | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 52 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | 55 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 53 | // TODO: | 56 | ctx.AddU1("{}=anyInvocationARB({});", inst, pred); |
| 54 | // if (ctx.profile.warp_size_potentially_larger_than_guest) { | 57 | } else { |
| 55 | // } | 58 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; |
| 59 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; | ||
| 60 | ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||
| 61 | } | ||
| 56 | } | 62 | } |
| 57 | 63 | ||
| 58 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 64 | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 59 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | 65 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 60 | // TODO: | 66 | ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |
| 61 | // if (ctx.profile.warp_size_potentially_larger_than_guest) { | 67 | } else { |
| 62 | // } | 68 | const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; |
| 69 | const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; | ||
| 70 | const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||
| 71 | ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||
| 72 | } | ||
| 63 | } | 73 | } |
| 64 | 74 | ||
| 65 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | 75 | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { |
| 66 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | 76 | if (!ctx.profile.warp_size_potentially_larger_than_guest) { |
| 67 | // TODO: | 77 | ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); |
| 68 | // if (ctx.profile.warp_size_potentially_larger_than_guest) { | 78 | } else { |
| 69 | // } | 79 | ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); |
| 80 | } | ||
| 70 | } | 81 | } |
| 71 | 82 | ||
| 72 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | 83 | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 071133781..20ea42cff 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -160,6 +160,7 @@ Device::Device() { | |||
| 160 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); | 160 | has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); |
| 161 | has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; | 161 | has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; |
| 162 | has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; | 162 | has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; |
| 163 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | ||
| 163 | 164 | ||
| 164 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 165 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
| 165 | // uniform buffers as "push constants" | 166 | // uniform buffers as "push constants" |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9b9402c29..ff0ff2b08 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -128,6 +128,10 @@ public: | |||
| 128 | return has_amd_shader_half_float; | 128 | return has_amd_shader_half_float; |
| 129 | } | 129 | } |
| 130 | 130 | ||
| 131 | bool IsWarpSizePotentiallyLargerThanGuest() const { | ||
| 132 | return warp_size_potentially_larger_than_guest; | ||
| 133 | } | ||
| 134 | |||
| 131 | private: | 135 | private: |
| 132 | static bool TestVariableAoffi(); | 136 | static bool TestVariableAoffi(); |
| 133 | static bool TestPreciseBug(); | 137 | static bool TestPreciseBug(); |
| @@ -161,6 +165,7 @@ private: | |||
| 161 | bool has_depth_buffer_float{}; | 165 | bool has_depth_buffer_float{}; |
| 162 | bool has_nv_gpu_shader_5{}; | 166 | bool has_nv_gpu_shader_5{}; |
| 163 | bool has_amd_shader_half_float{}; | 167 | bool has_amd_shader_half_float{}; |
| 168 | bool warp_size_potentially_larger_than_guest{}; | ||
| 164 | }; | 169 | }; |
| 165 | 170 | ||
| 166 | } // namespace OpenGL | 171 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8a052851b..cd11ff653 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 220 | .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), | 220 | .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), |
| 221 | .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), | 221 | .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), |
| 222 | 222 | ||
| 223 | .warp_size_potentially_larger_than_guest = true, | 223 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), |
| 224 | 224 | ||
| 225 | .lower_left_origin_mode = true, | 225 | .lower_left_origin_mode = true, |
| 226 | .need_declared_frag_colors = true, | 226 | .need_declared_frag_colors = true, |