diff options
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | 23 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/spirv/spirv_emit_context.h | 36 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 35 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 18 |
6 files changed, 97 insertions, 25 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 2868fc57d..1d77426e0 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, | |||
| 111 | } else if (element_size > 1) { | 111 | } else if (element_size > 1) { |
| 112 | const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | 112 | const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; |
| 113 | const Id shift{ctx.Const(log2_element_size)}; | 113 | const Id shift{ctx.Const(log2_element_size)}; |
| 114 | buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); | 114 | buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift); |
| 115 | } else { | 115 | } else { |
| 116 | buffer_offset = ctx.Def(offset); | 116 | buffer_offset = ctx.Def(offset); |
| 117 | } | 117 | } |
| 118 | if (!binding.IsImmediate()) { | 118 | if (!binding.IsImmediate()) { |
| 119 | return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); | 119 | return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); |
| 120 | } | 120 | } |
| 121 | |||
| 121 | const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | 122 | const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; |
| 122 | const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; | 123 | const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; |
| 123 | return ctx.OpLoad(result_type, access_chain); | 124 | const Id val = ctx.OpLoad(result_type, access_chain); |
| 125 | |||
| 126 | if (offset.IsImmediate() || !ctx.profile.has_broken_robust) { | ||
| 127 | return val; | ||
| 128 | } | ||
| 129 | |||
| 130 | const auto is_float = UniformDefinitions::IsFloat(member_ptr); | ||
| 131 | const auto num_elements = UniformDefinitions::NumElements(member_ptr); | ||
| 132 | const std::array zero_vec{ | ||
| 133 | is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||
| 134 | is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||
| 135 | is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||
| 136 | is_float ? ctx.Const(0.0f) : ctx.Const(0u), | ||
| 137 | }; | ||
| 138 | const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu)); | ||
| 139 | const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements)); | ||
| 140 | return ctx.OpSelect(result_type, cond, val, zero); | ||
| 124 | } | 141 | } |
| 125 | 142 | ||
| 126 | Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | 143 | Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { |
| @@ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde | |||
| 138 | const u32 element{(offset.U32() / 4) % 4 + index_offset}; | 155 | const u32 element{(offset.U32() / 4) % 4 + index_offset}; |
| 139 | return ctx.OpCompositeExtract(ctx.U32[1], vector, element); | 156 | return ctx.OpCompositeExtract(ctx.U32[1], vector, element); |
| 140 | } | 157 | } |
| 141 | const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; | 158 | const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; |
| 142 | Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; | 159 | Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; |
| 143 | if (index_offset > 0) { | 160 | if (index_offset > 0) { |
| 144 | element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); | 161 | element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 7c49fd504..1aa79863d 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h | |||
| @@ -64,6 +64,42 @@ struct UniformDefinitions { | |||
| 64 | Id F32{}; | 64 | Id F32{}; |
| 65 | Id U32x2{}; | 65 | Id U32x2{}; |
| 66 | Id U32x4{}; | 66 | Id U32x4{}; |
| 67 | |||
| 68 | constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) { | ||
| 69 | if (member_ptr == &UniformDefinitions::U8) { | ||
| 70 | return 1; | ||
| 71 | } | ||
| 72 | if (member_ptr == &UniformDefinitions::S8) { | ||
| 73 | return 1; | ||
| 74 | } | ||
| 75 | if (member_ptr == &UniformDefinitions::U16) { | ||
| 76 | return 1; | ||
| 77 | } | ||
| 78 | if (member_ptr == &UniformDefinitions::S16) { | ||
| 79 | return 1; | ||
| 80 | } | ||
| 81 | if (member_ptr == &UniformDefinitions::U32) { | ||
| 82 | return 1; | ||
| 83 | } | ||
| 84 | if (member_ptr == &UniformDefinitions::F32) { | ||
| 85 | return 1; | ||
| 86 | } | ||
| 87 | if (member_ptr == &UniformDefinitions::U32x2) { | ||
| 88 | return 2; | ||
| 89 | } | ||
| 90 | if (member_ptr == &UniformDefinitions::U32x4) { | ||
| 91 | return 4; | ||
| 92 | } | ||
| 93 | ASSERT(false); | ||
| 94 | return 1; | ||
| 95 | } | ||
| 96 | |||
| 97 | constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) { | ||
| 98 | if (member_ptr == &UniformDefinitions::F32) { | ||
| 99 | return true; | ||
| 100 | } | ||
| 101 | return false; | ||
| 102 | } | ||
| 67 | }; | 103 | }; |
| 68 | 104 | ||
| 69 | struct StorageTypeDefinition { | 105 | struct StorageTypeDefinition { |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 9ca97f6a4..38d820db2 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -9,7 +9,6 @@ namespace Shader { | |||
| 9 | 9 | ||
| 10 | struct Profile { | 10 | struct Profile { |
| 11 | u32 supported_spirv{0x00010000}; | 11 | u32 supported_spirv{0x00010000}; |
| 12 | |||
| 13 | bool unified_descriptor_binding{}; | 12 | bool unified_descriptor_binding{}; |
| 14 | bool support_descriptor_aliasing{}; | 13 | bool support_descriptor_aliasing{}; |
| 15 | bool support_int8{}; | 14 | bool support_int8{}; |
| @@ -82,6 +81,9 @@ struct Profile { | |||
| 82 | bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; | 81 | bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; |
| 83 | 82 | ||
| 84 | u32 gl_max_compute_smem_size{}; | 83 | u32 gl_max_compute_smem_size{}; |
| 84 | |||
| 85 | /// Maxwell and earlier nVidia architectures have broken robust support | ||
| 86 | bool has_broken_robust{}; | ||
| 85 | }; | 87 | }; |
| 86 | 88 | ||
| 87 | } // namespace Shader | 89 | } // namespace Shader |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a1ec1a100..804b95989 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 356 | .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, | 356 | .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, |
| 357 | .ignore_nan_fp_comparisons = false, | 357 | .ignore_nan_fp_comparisons = false, |
| 358 | .has_broken_spirv_subgroup_mask_vector_extract_dynamic = | 358 | .has_broken_spirv_subgroup_mask_vector_extract_dynamic = |
| 359 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; | 359 | driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, |
| 360 | .has_broken_robust = | ||
| 361 | device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell, | ||
| 362 | }; | ||
| 363 | |||
| 360 | host_info = Shader::HostTranslateInfo{ | 364 | host_info = Shader::HostTranslateInfo{ |
| 361 | .support_float64 = device.IsFloat64Supported(), | 365 | .support_float64 = device.IsFloat64Supported(), |
| 362 | .support_float16 = device.IsFloat16Supported(), | 366 | .support_float16 = device.IsFloat16Supported(), |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 876cec2e8..e518756d2 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{ | |||
| 83 | 83 | ||
| 84 | } // namespace Alternatives | 84 | } // namespace Alternatives |
| 85 | 85 | ||
| 86 | enum class NvidiaArchitecture { | ||
| 87 | KeplerOrOlder, | ||
| 88 | Maxwell, | ||
| 89 | Pascal, | ||
| 90 | Volta, | ||
| 91 | Turing, | ||
| 92 | AmpereOrNewer, | ||
| 93 | }; | ||
| 94 | |||
| 95 | template <typename T> | 86 | template <typename T> |
| 96 | void SetNext(void**& next, T& data) { | 87 | void SetNext(void**& next, T& data) { |
| 97 | *next = &data; | 88 | *next = &data; |
| @@ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | |||
| 326 | if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { | 317 | if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { |
| 327 | // Only Ampere and newer support this feature | 318 | // Only Ampere and newer support this feature |
| 328 | // TODO: Find a way to differentiate Ampere and Ada | 319 | // TODO: Find a way to differentiate Ampere and Ada |
| 329 | return NvidiaArchitecture::AmpereOrNewer; | 320 | return NvidiaArchitecture::Arch_AmpereOrNewer; |
| 330 | } | 321 | } |
| 331 | return NvidiaArchitecture::Turing; | 322 | return NvidiaArchitecture::Arch_Turing; |
| 332 | } | 323 | } |
| 333 | 324 | ||
| 334 | if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { | 325 | if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { |
| @@ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | |||
| 340 | physical_properties.pNext = &advanced_blending_props; | 331 | physical_properties.pNext = &advanced_blending_props; |
| 341 | physical.GetProperties2(physical_properties); | 332 | physical.GetProperties2(physical_properties); |
| 342 | if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { | 333 | if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { |
| 343 | return NvidiaArchitecture::Maxwell; | 334 | return NvidiaArchitecture::Arch_Maxwell; |
| 344 | } | 335 | } |
| 345 | 336 | ||
| 346 | if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { | 337 | if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { |
| @@ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | |||
| 350 | physical_properties.pNext = &conservative_raster_props; | 341 | physical_properties.pNext = &conservative_raster_props; |
| 351 | physical.GetProperties2(physical_properties); | 342 | physical.GetProperties2(physical_properties); |
| 352 | if (conservative_raster_props.degenerateLinesRasterized) { | 343 | if (conservative_raster_props.degenerateLinesRasterized) { |
| 353 | return NvidiaArchitecture::Volta; | 344 | return NvidiaArchitecture::Arch_Volta; |
| 354 | } | 345 | } |
| 355 | return NvidiaArchitecture::Pascal; | 346 | return NvidiaArchitecture::Arch_Pascal; |
| 356 | } | 347 | } |
| 357 | } | 348 | } |
| 358 | 349 | ||
| 359 | return NvidiaArchitecture::KeplerOrOlder; | 350 | return NvidiaArchitecture::Arch_KeplerOrOlder; |
| 360 | } | 351 | } |
| 361 | 352 | ||
| 362 | std::vector<const char*> ExtensionListForVulkan( | 353 | std::vector<const char*> ExtensionListForVulkan( |
| @@ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 436 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | 427 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); |
| 437 | } | 428 | } |
| 438 | 429 | ||
| 430 | if (is_nvidia) { | ||
| 431 | nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions); | ||
| 432 | } | ||
| 433 | |||
| 439 | SetupFamilies(surface); | 434 | SetupFamilies(surface); |
| 440 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 435 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 441 | 436 | ||
| @@ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 532 | 527 | ||
| 533 | if (is_nvidia) { | 528 | if (is_nvidia) { |
| 534 | const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; | 529 | const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; |
| 535 | const auto arch = GetNvidiaArchitecture(physical, supported_extensions); | 530 | const auto arch = GetNvidiaArch(); |
| 536 | if (arch >= NvidiaArchitecture::AmpereOrNewer) { | 531 | if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) { |
| 537 | LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); | 532 | LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); |
| 538 | features.shader_float16_int8.shaderFloat16 = false; | 533 | features.shader_float16_int8.shaderFloat16 = false; |
| 539 | } else if (arch <= NvidiaArchitecture::Volta) { | 534 | } else if (arch <= NvidiaArchitecture::Arch_Volta) { |
| 540 | if (nv_major_version < 527) { | 535 | if (nv_major_version < 527) { |
| 541 | LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); | 536 | LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); |
| 542 | RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | 537 | RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); |
| @@ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 686 | RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | 681 | RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); |
| 687 | } | 682 | } |
| 688 | } else if (extensions.push_descriptor && is_nvidia) { | 683 | } else if (extensions.push_descriptor && is_nvidia) { |
| 689 | const auto arch = GetNvidiaArchitecture(physical, supported_extensions); | 684 | const auto arch = GetNvidiaArch(); |
| 690 | if (arch <= NvidiaArchitecture::Pascal) { | 685 | if (arch <= NvidiaArchitecture::Arch_Pascal) { |
| 691 | LOG_WARNING(Render_Vulkan, | 686 | LOG_WARNING(Render_Vulkan, |
| 692 | "Pascal and older architectures have broken VK_KHR_push_descriptor"); | 687 | "Pascal and older architectures have broken VK_KHR_push_descriptor"); |
| 693 | RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); | 688 | RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 282a2925d..b213ed7dd 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer }; | |||
| 177 | /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). | 177 | /// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). |
| 178 | const u32 GuestWarpSize = 32; | 178 | const u32 GuestWarpSize = 32; |
| 179 | 179 | ||
| 180 | enum class NvidiaArchitecture { | ||
| 181 | Arch_KeplerOrOlder, | ||
| 182 | Arch_Maxwell, | ||
| 183 | Arch_Pascal, | ||
| 184 | Arch_Volta, | ||
| 185 | Arch_Turing, | ||
| 186 | Arch_AmpereOrNewer, | ||
| 187 | }; | ||
| 188 | |||
| 180 | /// Handles data specific to a physical device. | 189 | /// Handles data specific to a physical device. |
| 181 | class Device { | 190 | class Device { |
| 182 | public: | 191 | public: |
| @@ -670,6 +679,14 @@ public: | |||
| 670 | return false; | 679 | return false; |
| 671 | } | 680 | } |
| 672 | 681 | ||
| 682 | bool IsNvidia() const noexcept { | ||
| 683 | return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY; | ||
| 684 | } | ||
| 685 | |||
| 686 | NvidiaArchitecture GetNvidiaArch() const noexcept { | ||
| 687 | return nvidia_arch; | ||
| 688 | } | ||
| 689 | |||
| 673 | private: | 690 | private: |
| 674 | /// Checks if the physical device is suitable and configures the object state | 691 | /// Checks if the physical device is suitable and configures the object state |
| 675 | /// with all necessary info about its properties. | 692 | /// with all necessary info about its properties. |
| @@ -788,6 +805,7 @@ private: | |||
| 788 | bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. | 805 | bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. |
| 789 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. | 806 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. |
| 790 | u32 sets_per_pool{}; ///< Sets per Description Pool | 807 | u32 sets_per_pool{}; ///< Sets per Description Pool |
| 808 | NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer}; | ||
| 791 | 809 | ||
| 792 | // Telemetry parameters | 810 | // Telemetry parameters |
| 793 | std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. | 811 | std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. |