diff options
| author | 2021-12-27 23:59:32 -0500 | |
|---|---|---|
| committer | 2021-12-29 19:03:50 -0500 | |
| commit | b84d429c2ec59e54a89d9d4e34b0df9f22172e8f (patch) | |
| tree | e275b74a6fb2d16219697cbad38925e2e660e058 /src | |
| parent | emit_glsl_integer: Use negation work around (diff) | |
| download | yuzu-b84d429c2ec59e54a89d9d4e34b0df9f22172e8f.tar.gz yuzu-b84d429c2ec59e54a89d9d4e34b0df9f22172e8f.tar.xz yuzu-b84d429c2ec59e54a89d9d4e34b0df9f22172e8f.zip | |
glsl_context_get_set: Add alternative cbuf type for broken drivers
some drivers have a bug bitwise converting floating point cbuf values to uint variables. This adds a workaround for these drivers to make all cbufs uint and convert to floating point as needed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | 35 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | 7 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 |
6 files changed, 35 insertions, 24 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 5ef46d634..0c1fbc7b1 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp | |||
| @@ -102,39 +102,46 @@ void GetCbuf16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const | |||
| 102 | 102 | ||
| 103 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 103 | void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 104 | const IR::Value& offset) { | 104 | const IR::Value& offset) { |
| 105 | GetCbuf8(ctx, inst, binding, offset, "ftou"); | 105 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; |
| 106 | GetCbuf8(ctx, inst, binding, offset, cast); | ||
| 106 | } | 107 | } |
| 107 | 108 | ||
| 108 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 109 | void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 109 | const IR::Value& offset) { | 110 | const IR::Value& offset) { |
| 110 | GetCbuf8(ctx, inst, binding, offset, "ftoi"); | 111 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; |
| 112 | GetCbuf8(ctx, inst, binding, offset, cast); | ||
| 111 | } | 113 | } |
| 112 | 114 | ||
| 113 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 115 | void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 114 | const IR::Value& offset) { | 116 | const IR::Value& offset) { |
| 115 | GetCbuf16(ctx, inst, binding, offset, "ftou"); | 117 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; |
| 118 | GetCbuf16(ctx, inst, binding, offset, cast); | ||
| 116 | } | 119 | } |
| 117 | 120 | ||
| 118 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 121 | void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 119 | const IR::Value& offset) { | 122 | const IR::Value& offset) { |
| 120 | GetCbuf16(ctx, inst, binding, offset, "ftoi"); | 123 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "int" : "ftoi"}; |
| 124 | GetCbuf16(ctx, inst, binding, offset, cast); | ||
| 121 | } | 125 | } |
| 122 | 126 | ||
| 123 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 127 | void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 124 | const IR::Value& offset) { | 128 | const IR::Value& offset) { |
| 125 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; | 129 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)}; |
| 126 | GetCbuf(ctx, ret, binding, offset, 32, "ftou"); | 130 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; |
| 131 | GetCbuf(ctx, ret, binding, offset, 32, cast); | ||
| 127 | } | 132 | } |
| 128 | 133 | ||
| 129 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 134 | void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 130 | const IR::Value& offset) { | 135 | const IR::Value& offset) { |
| 131 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; | 136 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::F32)}; |
| 132 | GetCbuf(ctx, ret, binding, offset, 32); | 137 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "utof" : ""}; |
| 138 | GetCbuf(ctx, ret, binding, offset, 32, cast); | ||
| 133 | } | 139 | } |
| 134 | 140 | ||
| 135 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | 141 | void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, |
| 136 | const IR::Value& offset) { | 142 | const IR::Value& offset) { |
| 137 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | 143 | const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; |
| 144 | const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; | ||
| 138 | if (offset.IsImmediate()) { | 145 | if (offset.IsImmediate()) { |
| 139 | static constexpr u32 cbuf_size{0x10000}; | 146 | static constexpr u32 cbuf_size{0x10000}; |
| 140 | const u32 u32_offset{offset.U32()}; | 147 | const u32 u32_offset{offset.U32()}; |
| @@ -145,26 +152,26 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding | |||
| 145 | return; | 152 | return; |
| 146 | } | 153 | } |
| 147 | if (u32_offset % 2 == 0) { | 154 | if (u32_offset % 2 == 0) { |
| 148 | ctx.AddU32x2("{}=ftou({}[{}].{}{});", inst, cbuf, u32_offset / 16, | 155 | ctx.AddU32x2("{}={}({}[{}].{}{});", inst, cast, cbuf, u32_offset / 16, |
| 149 | OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); | 156 | OffsetSwizzle(u32_offset), OffsetSwizzle(u32_offset + 4)); |
| 150 | } else { | 157 | } else { |
| 151 | ctx.AddU32x2("{}=uvec2(ftou({}[{}].{}),ftou({}[{}].{}));", inst, cbuf, u32_offset / 16, | 158 | ctx.AddU32x2("{}=uvec2({}({}[{}].{}),{}({}[{}].{}));", inst, cast, cbuf, |
| 152 | OffsetSwizzle(u32_offset), cbuf, (u32_offset + 4) / 16, | 159 | u32_offset / 16, OffsetSwizzle(u32_offset), cast, cbuf, |
| 153 | OffsetSwizzle(u32_offset + 4)); | 160 | (u32_offset + 4) / 16, OffsetSwizzle(u32_offset + 4)); |
| 154 | } | 161 | } |
| 155 | return; | 162 | return; |
| 156 | } | 163 | } |
| 157 | const auto offset_var{ctx.var_alloc.Consume(offset)}; | 164 | const auto offset_var{ctx.var_alloc.Consume(offset)}; |
| 158 | if (!ctx.profile.has_gl_component_indexing_bug) { | 165 | if (!ctx.profile.has_gl_component_indexing_bug) { |
| 159 | ctx.AddU32x2("{}=uvec2(ftou({}[{}>>4][({}>>2)%4]),ftou({}[({}+4)>>4][(({}+4)>>2)%4]));", | 166 | ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst, |
| 160 | inst, cbuf, offset_var, offset_var, cbuf, offset_var, offset_var); | 167 | cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var); |
| 161 | return; | 168 | return; |
| 162 | } | 169 | } |
| 163 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | 170 | const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; |
| 164 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | 171 | const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; |
| 165 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | 172 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { |
| 166 | ctx.Add("if(({}&3)=={}){}=uvec2(ftou({}[{}>>4].{}),ftou({}[({}+4)>>4].{}));", cbuf_offset, | 173 | ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset, |
| 167 | swizzle, ret, cbuf, offset_var, "xyzw"[swizzle], cbuf, offset_var, | 174 | swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var, |
| 168 | "xyzw"[(swizzle + 1) % 4]); | 175 | "xyzw"[(swizzle + 1) % 4]); |
| 169 | } | 176 | } |
| 170 | } | 177 | } |
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index bc9d2a904..bb7f1a0fd 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | |||
| @@ -428,9 +428,10 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { | |||
| 428 | return; | 428 | return; |
| 429 | } | 429 | } |
| 430 | for (const auto& desc : info.constant_buffer_descriptors) { | 430 | for (const auto& desc : info.constant_buffer_descriptors) { |
| 431 | header += fmt::format( | 431 | const auto cbuf_type{profile.has_gl_cbuf_ftou_bug ? "uvec4" : "vec4"}; |
| 432 | "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", | 432 | header += fmt::format("layout(std140,binding={}) uniform {}_cbuf_{}{{{} {}_cbuf{}[{}];}};", |
| 433 | bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); | 433 | bindings.uniform_buffer, stage_name, desc.index, cbuf_type, |
| 434 | stage_name, desc.index, 4 * 1024); | ||
| 434 | bindings.uniform_buffer += desc.count; | 435 | bindings.uniform_buffer += desc.count; |
| 435 | } | 436 | } |
| 436 | } | 437 | } |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0c3b3b17..9deb3f4bb 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -65,6 +65,8 @@ struct Profile { | |||
| 65 | bool has_gl_component_indexing_bug{}; | 65 | bool has_gl_component_indexing_bug{}; |
| 66 | /// The precise type qualifier is broken in the fragment stage of some drivers | 66 | /// The precise type qualifier is broken in the fragment stage of some drivers |
| 67 | bool has_gl_precise_bug{}; | 67 | bool has_gl_precise_bug{}; |
| 68 | /// Some drivers do not properly support floatBitsToUint when used on cbufs | ||
| 69 | bool has_gl_cbuf_ftou_bug{}; | ||
| 68 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics | 70 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics |
| 69 | bool ignore_nan_fp_comparisons{}; | 71 | bool ignore_nan_fp_comparisons{}; |
| 70 | 72 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 0764ea6e0..32736126f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -182,17 +182,12 @@ Device::Device() { | |||
| 182 | shader_backend = Settings::ShaderBackend::GLSL; | 182 | shader_backend = Settings::ShaderBackend::GLSL; |
| 183 | } | 183 | } |
| 184 | 184 | ||
| 185 | if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia && | 185 | if (shader_backend == Settings::ShaderBackend::GLSL && is_nvidia) { |
| 186 | !Settings::values.renderer_debug) { | ||
| 187 | const std::string_view driver_version = version.substr(13); | 186 | const std::string_view driver_version = version.substr(13); |
| 188 | const int version_major = | 187 | const int version_major = |
| 189 | std::atoi(driver_version.substr(0, driver_version.find(".")).data()); | 188 | std::atoi(driver_version.substr(0, driver_version.find(".")).data()); |
| 190 | |||
| 191 | if (version_major >= 495) { | 189 | if (version_major >= 495) { |
| 192 | LOG_WARNING(Render_OpenGL, "NVIDIA drivers 495 and later causes significant problems " | 190 | has_cbuf_ftou_bug = true; |
| 193 | "with yuzu. Forcing GLASM as a mitigation."); | ||
| 194 | shader_backend = Settings::ShaderBackend::GLASM; | ||
| 195 | use_assembly_shaders = true; | ||
| 196 | } | 191 | } |
| 197 | } | 192 | } |
| 198 | 193 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de9e41659..fe53ef991 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -152,6 +152,10 @@ public: | |||
| 152 | return need_fastmath_off; | 152 | return need_fastmath_off; |
| 153 | } | 153 | } |
| 154 | 154 | ||
| 155 | bool HasCbufFtouBug() const { | ||
| 156 | return has_cbuf_ftou_bug; | ||
| 157 | } | ||
| 158 | |||
| 155 | Settings::ShaderBackend GetShaderBackend() const { | 159 | Settings::ShaderBackend GetShaderBackend() const { |
| 156 | return shader_backend; | 160 | return shader_backend; |
| 157 | } | 161 | } |
| @@ -200,6 +204,7 @@ private: | |||
| 200 | bool has_sparse_texture_2{}; | 204 | bool has_sparse_texture_2{}; |
| 201 | bool warp_size_potentially_larger_than_guest{}; | 205 | bool warp_size_potentially_larger_than_guest{}; |
| 202 | bool need_fastmath_off{}; | 206 | bool need_fastmath_off{}; |
| 207 | bool has_cbuf_ftou_bug{}; | ||
| 203 | 208 | ||
| 204 | std::string vendor_name; | 209 | std::string vendor_name; |
| 205 | }; | 210 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 29c6e1a5f..1efcc3562 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -214,6 +214,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 214 | .has_broken_fp16_float_controls = false, | 214 | .has_broken_fp16_float_controls = false, |
| 215 | .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), | 215 | .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), |
| 216 | .has_gl_precise_bug = device.HasPreciseBug(), | 216 | .has_gl_precise_bug = device.HasPreciseBug(), |
| 217 | .has_gl_cbuf_ftou_bug = device.HasCbufFtouBug(), | ||
| 217 | .ignore_nan_fp_comparisons = true, | 218 | .ignore_nan_fp_comparisons = true, |
| 218 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | 219 | .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), |
| 219 | }, | 220 | }, |