diff options
Diffstat (limited to '')
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | 17 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 1 |
3 files changed, 19 insertions, 1 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp index c5db19d09..77ff8c573 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp | |||
| @@ -17,7 +17,22 @@ Id GetThreadId(EmitContext& ctx) { | |||
| 17 | Id WarpExtract(EmitContext& ctx, Id value) { | 17 | Id WarpExtract(EmitContext& ctx, Id value) { |
| 18 | const Id thread_id{GetThreadId(ctx)}; | 18 | const Id thread_id{GetThreadId(ctx)}; |
| 19 | const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; | 19 | const Id local_index{ctx.OpShiftRightArithmetic(ctx.U32[1], thread_id, ctx.Const(5U))}; |
| 20 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | 20 | if (ctx.profile.has_broken_spirv_subgroup_mask_vector_extract_dynamic) { |
| 21 | const Id c0_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(0U)), | ||
| 22 | ctx.OpCompositeExtract(ctx.U32[1], value, 0U), ctx.Const(0U))}; | ||
| 23 | const Id c1_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(1U)), | ||
| 24 | ctx.OpCompositeExtract(ctx.U32[1], value, 1U), ctx.Const(0U))}; | ||
| 25 | const Id c2_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(2U)), | ||
| 26 | ctx.OpCompositeExtract(ctx.U32[1], value, 2U), ctx.Const(0U))}; | ||
| 27 | const Id c3_sel{ctx.OpSelect(ctx.U32[1], ctx.OpIEqual(ctx.U1, local_index, ctx.Const(3U)), | ||
| 28 | ctx.OpCompositeExtract(ctx.U32[1], value, 3U), ctx.Const(0U))}; | ||
| 29 | const Id c0_or_c1{ctx.OpBitwiseOr(ctx.U32[1], c0_sel, c1_sel)}; | ||
| 30 | const Id c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c2_sel, c3_sel)}; | ||
| 31 | const Id c0_or_c1_or_c2_or_c3{ctx.OpBitwiseOr(ctx.U32[1], c0_or_c1, c2_or_c3)}; | ||
| 32 | return c0_or_c1_or_c2_or_c3; | ||
| 33 | } else { | ||
| 34 | return ctx.OpVectorExtractDynamic(ctx.U32[1], value, local_index); | ||
| 35 | } | ||
| 21 | } | 36 | } |
| 22 | 37 | ||
| 23 | Id LoadMask(EmitContext& ctx, Id mask) { | 38 | Id LoadMask(EmitContext& ctx, Id mask) { |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 3bb4a7e6f..9ca97f6a4 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -78,6 +78,8 @@ struct Profile { | |||
| 78 | bool has_gl_bool_ref_bug{}; | 78 | bool has_gl_bool_ref_bug{}; |
| 79 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics | 79 | /// Ignores SPIR-V ordered vs unordered using GLSL semantics |
| 80 | bool ignore_nan_fp_comparisons{}; | 80 | bool ignore_nan_fp_comparisons{}; |
| 81 | /// Some drivers have broken support for OpVectorExtractDynamic on subgroup mask inputs | ||
| 82 | bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; | ||
| 81 | 83 | ||
| 82 | u32 gl_max_compute_smem_size{}; | 84 | u32 gl_max_compute_smem_size{}; |
| 83 | }; | 85 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index e39713761..e506a8b30 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -351,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 351 | .has_broken_signed_operations = false, | 351 | .has_broken_signed_operations = false, |
| 352 | .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, | 352 | .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, |
| 353 | .ignore_nan_fp_comparisons = false, | 353 | .ignore_nan_fp_comparisons = false, |
| 354 | .has_broken_spirv_subgroup_mask_vector_extract_dynamic = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY | ||
| 354 | }; | 355 | }; |
| 355 | host_info = Shader::HostTranslateInfo{ | 356 | host_info = Shader::HostTranslateInfo{ |
| 356 | .support_float16 = device.IsFloat16Supported(), | 357 | .support_float16 = device.IsFloat16Supported(), |