summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/backend/glsl/emit_context.cpp12
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp2
5 files changed, 43 insertions, 20 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp
index 5456d4e5b..c6325e55f 100644
--- a/src/shader_recompiler/backend/glsl/emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_context.cpp
@@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
122 122
123void EmitContext::SetupExtensions(std::string&) { 123void EmitContext::SetupExtensions(std::string&) {
124 header += "#extension GL_ARB_separate_shader_objects : enable\n"; 124 header += "#extension GL_ARB_separate_shader_objects : enable\n";
125 header += "#extension GL_ARB_sparse_texture2 : enable\n"; 125 if (stage != Stage::Compute) {
126 header += "#extension GL_EXT_texture_shadow_lod : enable\n"; 126 // TODO: track this usage
127 // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; 127 header += "#extension GL_ARB_sparse_texture2 : enable\n";
128 header += "#extension GL_EXT_texture_shadow_lod : enable\n";
129 }
128 if (info.uses_int64) { 130 if (info.uses_int64) {
129 header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; 131 header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
130 } 132 }
@@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) {
149 info.uses_subgroup_shuffles || info.uses_fswzadd) { 151 info.uses_subgroup_shuffles || info.uses_fswzadd) {
150 header += "#extension GL_ARB_shader_ballot : enable\n"; 152 header += "#extension GL_ARB_shader_ballot : enable\n";
151 header += "#extension GL_ARB_shader_group_vote : enable\n"; 153 header += "#extension GL_ARB_shader_group_vote : enable\n";
154 header += "#extension GL_KHR_shader_subgroup_basic : enable\n";
155 if (!info.uses_int64) {
156 header += "#extension GL_ARB_gpu_shader_int64 : enable\n";
157 }
152 } 158 }
153} 159}
154 160
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
index e462c977c..8a018acb5 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp
@@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in
42} 42}
43 43
44void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 44void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
45 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); 45 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
46 // TODO: 46 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
47 // if (ctx.profile.warp_size_potentially_larger_than_guest) { 47 } else {
48 // } 48 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
49 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
50 ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask);
51 }
49} 52}
50 53
51void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 54void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
52 ctx.AddU1("{}=anyInvocationARB({});", inst, pred); 55 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
53 // TODO: 56 ctx.AddU1("{}=anyInvocationARB({});", inst, pred);
54 // if (ctx.profile.warp_size_potentially_larger_than_guest) { 57 } else {
55 // } 58 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
59 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
60 ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask);
61 }
56} 62}
57 63
58void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 64void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
59 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); 65 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
60 // TODO: 66 ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred);
61 // if (ctx.profile.warp_size_potentially_larger_than_guest) { 67 } else {
62 // } 68 const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")};
69 const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)};
70 const auto value{fmt::format("({}^{})", ballot, active_mask)};
71 ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask);
72 }
63} 73}
64 74
65void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { 75void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) {
66 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); 76 if (!ctx.profile.warp_size_potentially_larger_than_guest) {
67 // TODO: 77 ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred);
68 // if (ctx.profile.warp_size_potentially_larger_than_guest) { 78 } else {
69 // } 79 ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred);
80 }
70} 81}
71 82
72void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { 83void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) {
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 071133781..20ea42cff 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -160,6 +160,7 @@ Device::Device() {
160 has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); 160 has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float");
161 has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; 161 has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5;
162 has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; 162 has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
163 warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
163 164
164 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive 165 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
165 // uniform buffers as "push constants" 166 // uniform buffers as "push constants"
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 9b9402c29..ff0ff2b08 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -128,6 +128,10 @@ public:
128 return has_amd_shader_half_float; 128 return has_amd_shader_half_float;
129 } 129 }
130 130
131 bool IsWarpSizePotentiallyLargerThanGuest() const {
132 return warp_size_potentially_larger_than_guest;
133 }
134
131private: 135private:
132 static bool TestVariableAoffi(); 136 static bool TestVariableAoffi();
133 static bool TestPreciseBug(); 137 static bool TestPreciseBug();
@@ -161,6 +165,7 @@ private:
161 bool has_depth_buffer_float{}; 165 bool has_depth_buffer_float{};
162 bool has_nv_gpu_shader_5{}; 166 bool has_nv_gpu_shader_5{};
163 bool has_amd_shader_half_float{}; 167 bool has_amd_shader_half_float{};
168 bool warp_size_potentially_larger_than_guest{};
164}; 169};
165 170
166} // namespace OpenGL 171} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 8a052851b..cd11ff653 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
220 .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), 220 .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(),
221 .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), 221 .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(),
222 222
223 .warp_size_potentially_larger_than_guest = true, 223 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(),
224 224
225 .lower_left_origin_mode = true, 225 .lower_left_origin_mode = true,
226 .need_declared_frag_colors = true, 226 .need_declared_frag_colors = true,