summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ameerj2021-12-27 23:59:32 -0500
committerGravatar ameerj2021-12-29 19:03:50 -0500
commitb84d429c2ec59e54a89d9d4e34b0df9f22172e8f (patch)
treee275b74a6fb2d16219697cbad38925e2e660e058
parentemit_glsl_integer: Use negation work around (diff)
downloadyuzu-b84d429c2ec59e54a89d9d4e34b0df9f22172e8f.tar.gz
yuzu-b84d429c2ec59e54a89d9d4e34b0df9f22172e8f.tar.xz
yuzu-b84d429c2ec59e54a89d9d4e34b0df9f22172e8f.zip
glsl_context_get_set: Add alternative cbuf type for broken drivers
some drivers have a bug bitwise converting floating point cbuf values to uint variables. This adds a workaround for these drivers to make all cbufs uint and convert to floating point as needed.
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp35
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp7
-rw-r--r--src/shader_recompiler/profile.h2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
6 files changed, 35 insertions, 24 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
index 5ef46d634..0c1fbc7b1 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
@@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const
102 102
103void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 103void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
104 const IR::Value& offset) { 104 const IR::Value& offset) {
105 GetCbuf8(ctx, inst, binding, offset, "ftou"); 105 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
106 GetCbuf8(ctx, inst, binding, offset, cast);
106} 107}
107 108
108void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 109void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
109 const IR::Value& offset) { 110 const IR::Value& offset) {
110 GetCbuf8(ctx, inst, binding, offset, "ftoi"); 111 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
112 GetCbuf8(ctx, inst, binding, offset, cast);
111} 113}
112 114
113void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 115void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
114 const IR::Value& offset) { 116 const IR::Value& offset) {
115 GetCbuf16(ctx, inst, binding, offset, "ftou"); 117 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
118 GetCbuf16(ctx, inst, binding, offset, cast);
116} 119}
117 120
118void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 121void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
119 const IR::Value& offset) { 122 const IR::Value& offset) {
120 GetCbuf16(ctx, inst, binding, offset, "ftoi"); 123 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"};
124 GetCbuf16(ctx, inst, binding, offset, cast);
121} 125}
122 126
123void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 127void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
124 const IR::Value& offset) { 128 const IR::Value& offset) {
125 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; 129 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
126 GetCbuf(ctx, ret, binding, offset, 32, "ftou"); 130 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
131 GetCbuf(ctx, ret, binding, offset, 32, cast);
127} 132}
128 133
129void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 134void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
130 const IR::Value& offset) { 135 const IR::Value& offset) {
131 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; 136 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)};
132 GetCbuf(ctx, ret, binding, offset, 32); 137 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""};
138 GetCbuf(ctx, ret, binding, offset, 32, cast);
133} 139}
134 140
135void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, 141void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
136 const IR::Value& offset) { 142 const IR::Value& offset) {
137 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; 143 const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())};
144 const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"};
138 if (offset.IsImmediate()) { 145 if (offset.IsImmediate()) {
139 static constexpr u32 cbuf_size{0x10000}; 146 static constexpr u32 cbuf_size{0x10000};
140 const u32 u32_offset{offset.U32()}; 147 const u32 u32_offset{offset.U32()};
@@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding
145 return; 152 return;
146 } 153 }
147 if (u32_offset % 2 == 0) { 154 if (u32_offset % 2 == 0) {
148 ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, 155 ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16,
149 OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); 156 OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4));
150 } else { 157 } else {
151 ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, 158 ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf,
152 OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, 159 u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf,
153 OffsetSwizzle(u32_offset + 4)); 160 (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4));
154 } 161 }
155 return; 162 return;
156 } 163 }
157 const auto offset_var{ctx.var_alloc.Consume(offset)}; 164 const auto offset_var{ctx.var_alloc.Consume(offset)};
158 if (!ctx.profile.has_gl_component_indexing_bug) { 165 if (!ctx.profile.has_gl_component_indexing_bug) {
159 ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", 166 ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst,
160 inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); 167 cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var);
161 return; 168 return;
162 } 169 }
163 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; 170 const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)};
164 const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; 171 const auto cbuf_offset{fmt::format("{}>>2", offset_var)};
165 for (u32 swizzle = 0; swizzle < 4; ++swizzle) { 172 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
166 ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, 173 ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset,
167 swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, 174 swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var,
168 "xyzw"[(swizzle + 1) % 4]); 175 "xyzw"[(swizzle + 1) % 4]);
169 } 176 }
170} 177}
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index bc9d2a904..bb7f1a0fd 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) {
428 return; 428 return;
429 } 429 }
430 for (const auto& desc : info.constant_buffer_descriptors) { 430 for (const auto& desc : info.constant_buffer_descriptors) {
431 header += fmt::format( 431 const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"};
432 "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", 432 header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};",
433 bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); 433 bindings.uniform_buffer, stage_name, desc.index, cbuf_type,
434 stage_name, desc.index, 4 * 1024);
434 bindings.uniform_buffer += desc.count; 435 bindings.uniform_buffer += desc.count;
435 } 436 }
436} 437}
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index f0c3b3b17..9deb3f4bb 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -65,6 +65,8 @@ struct Profile {
65 bool has_gl_component_indexing_bug{}; 65 bool has_gl_component_indexing_bug{};
66 /// The precise type qualifier is broken in the fragment stage of some drivers 66 /// The precise type qualifier is broken in the fragment stage of some drivers
67 bool has_gl_precise_bug{}; 67 bool has_gl_precise_bug{};
68 /// Some drivers do not properly support floatBitsToUint when used on cbufs
69 bool has_gl_cbuf_ftou_bug{};
68 /// Ignores SPIR-V ordered vs unordered using GLSL semantics 70 /// Ignores SPIR-V ordered vs unordered using GLSL semantics
69 bool ignore_nan_fp_comparisons{}; 71 bool ignore_nan_fp_comparisons{};
70 72
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 0764ea6e0..32736126f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -182,17 +182,12 @@ Device::Device() {
182 shader_backend = Settings::ShaderBackend::GLSL; 182 shader_backend = Settings::ShaderBackend::GLSL;
183 } 183 }
184 184
185 if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia && 185 if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) {
186 !Settings::values.renderer_debug) {
187 const std::string_view driver_version = version.substr(13); 186 const std::string_view driver_version = version.substr(13);
188 const int version_major = 187 const int version_major =
189 std::atoi(driver_version.substr(0, driver_version.find(".")).data()); 188 std::atoi(driver_version.substr(0, driver_version.find(".")).data());
190
191 if (version_major >= 495) { 189 if (version_major >= 495) {
192 LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems " 190 has_cbuf_ftou_bug = true;
193 "with yuzu. Forcing GLASM as a mitigation.");
194 shader_backend = Settings::ShaderBackend::GLASM;
195 use_assembly_shaders = true;
196 } 191 }
197 } 192 }
198 193
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de9e41659..fe53ef991 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -152,6 +152,10 @@ public:
152 return need_fastmath_off; 152 return need_fastmath_off;
153 } 153 }
154 154
155 bool HasCbufFtouBug() const {
156 return has_cbuf_ftou_bug;
157 }
158
155 Settings::ShaderBackend GetShaderBackend() const { 159 Settings::ShaderBackend GetShaderBackend() const {
156 return shader_backend; 160 return shader_backend;
157 } 161 }
@@ -200,6 +204,7 @@ private:
200 bool has_sparse_texture_2{}; 204 bool has_sparse_texture_2{};
201 bool warp_size_potentially_larger_than_guest{}; 205 bool warp_size_potentially_larger_than_guest{};
202 bool need_fastmath_off{}; 206 bool need_fastmath_off{};
207 bool has_cbuf_ftou_bug{};
203 208
204 std::string vendor_name; 209 std::string vendor_name;
205}; 210};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 29c6e1a5f..1efcc3562 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
214 .has_broken_fp16_float_controls = false, 214 .has_broken_fp16_float_controls = false,
215 .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), 215 .has_gl_component_indexing_bug = device.HasComponentIndexingBug(),
216 .has_gl_precise_bug = device.HasPreciseBug(), 216 .has_gl_precise_bug = device.HasPreciseBug(),
217 .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(),
217 .ignore_nan_fp_comparisons = true, 218 .ignore_nan_fp_comparisons = true,
218 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), 219 .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
219 }, 220 },