summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Kelebek12023-10-14 22:20:28 +0100
committerGravatar Kelebek12023-10-19 19:54:31 +0100
commite02ee8e59d099692678bed09332b7d8aad1ce271 (patch)
tree8ea0de01f31cee80075441fab4c746e0615a7ff7 /src
parentMerge pull request #11810 from liamwhite/clang-17 (diff)
downloadyuzu-e02ee8e59d099692678bed09332b7d8aad1ce271.tar.gz
yuzu-e02ee8e59d099692678bed09332b7d8aad1ce271.tar.xz
yuzu-e02ee8e59d099692678bed09332b7d8aad1ce271.zip
Manually robust on Maxwell and earlier
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp23
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h36
-rw-r--r--src/shader_recompiler/profile.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp6
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp35
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h18
6 files changed, 97 insertions, 25 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 2868fc57d..1d77426e0 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -111,16 +111,33 @@ Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr,
111 } else if (element_size > 1) { 111 } else if (element_size > 1) {
112 const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; 112 const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))};
113 const Id shift{ctx.Const(log2_element_size)}; 113 const Id shift{ctx.Const(log2_element_size)};
114 buffer_offset = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); 114 buffer_offset = ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), shift);
115 } else { 115 } else {
116 buffer_offset = ctx.Def(offset); 116 buffer_offset = ctx.Def(offset);
117 } 117 }
118 if (!binding.IsImmediate()) { 118 if (!binding.IsImmediate()) {
119 return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset); 119 return ctx.OpFunctionCall(result_type, indirect_func, ctx.Def(binding), buffer_offset);
120 } 120 }
121
121 const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; 122 const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr};
122 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)}; 123 const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, buffer_offset)};
123 return ctx.OpLoad(result_type, access_chain); 124 const Id val = ctx.OpLoad(result_type, access_chain);
125
126 if (offset.IsImmediate() || !ctx.profile.has_broken_robust) {
127 return val;
128 }
129
130 const auto is_float = UniformDefinitions::IsFloat(member_ptr);
131 const auto num_elements = UniformDefinitions::NumElements(member_ptr);
132 const std::array zero_vec{
133 is_float ? ctx.Const(0.0f) : ctx.Const(0u),
134 is_float ? ctx.Const(0.0f) : ctx.Const(0u),
135 is_float ? ctx.Const(0.0f) : ctx.Const(0u),
136 is_float ? ctx.Const(0.0f) : ctx.Const(0u),
137 };
138 const Id cond = ctx.OpULessThanEqual(ctx.TypeBool(), buffer_offset, ctx.Const(0xFFFFu));
139 const Id zero = ctx.OpCompositeConstruct(result_type, std::span(zero_vec.data(), num_elements));
140 return ctx.OpSelect(result_type, cond, val, zero);
124} 141}
125 142
126Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 143Id GetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
@@ -138,7 +155,7 @@ Id GetCbufElement(EmitContext& ctx, Id vector, const IR::Value& offset, u32 inde
138 const u32 element{(offset.U32() / 4) % 4 + index_offset}; 155 const u32 element{(offset.U32() / 4) % 4 + index_offset};
139 return ctx.OpCompositeExtract(ctx.U32[1], vector, element); 156 return ctx.OpCompositeExtract(ctx.U32[1], vector, element);
140 } 157 }
141 const Id shift{ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))}; 158 const Id shift{ctx.OpShiftRightLogical(ctx.U32[1], ctx.Def(offset), ctx.Const(2u))};
142 Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))}; 159 Id element{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Const(3u))};
143 if (index_offset > 0) { 160 if (index_offset > 0) {
144 element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset)); 161 element = ctx.OpIAdd(ctx.U32[1], element, ctx.Const(index_offset));
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 7c49fd504..1aa79863d 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -64,6 +64,42 @@ struct UniformDefinitions {
64 Id F32{}; 64 Id F32{};
65 Id U32x2{}; 65 Id U32x2{};
66 Id U32x4{}; 66 Id U32x4{};
67
68 constexpr static size_t NumElements(Id UniformDefinitions::*member_ptr) {
69 if (member_ptr == &UniformDefinitions::U8) {
70 return 1;
71 }
72 if (member_ptr == &UniformDefinitions::S8) {
73 return 1;
74 }
75 if (member_ptr == &UniformDefinitions::U16) {
76 return 1;
77 }
78 if (member_ptr == &UniformDefinitions::S16) {
79 return 1;
80 }
81 if (member_ptr == &UniformDefinitions::U32) {
82 return 1;
83 }
84 if (member_ptr == &UniformDefinitions::F32) {
85 return 1;
86 }
87 if (member_ptr == &UniformDefinitions::U32x2) {
88 return 2;
89 }
90 if (member_ptr == &UniformDefinitions::U32x4) {
91 return 4;
92 }
93 ASSERT(false);
94 return 1;
95 }
96
97 constexpr static bool IsFloat(Id UniformDefinitions::*member_ptr) {
98 if (member_ptr == &UniformDefinitions::F32) {
99 return true;
100 }
101 return false;
102 }
67}; 103};
68 104
69struct StorageTypeDefinition { 105struct StorageTypeDefinition {
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 9ca97f6a4..38d820db2 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -9,7 +9,6 @@ namespace Shader {
9 9
10struct Profile { 10struct Profile {
11 u32 supported_spirv{0x00010000}; 11 u32 supported_spirv{0x00010000};
12
13 bool unified_descriptor_binding{}; 12 bool unified_descriptor_binding{};
14 bool support_descriptor_aliasing{}; 13 bool support_descriptor_aliasing{};
15 bool support_int8{}; 14 bool support_int8{};
@@ -82,6 +81,9 @@ struct Profile {
82 bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{}; 81 bool has_broken_spirv_subgroup_mask_vector_extract_dynamic{};
83 82
84 u32 gl_max_compute_smem_size{}; 83 u32 gl_max_compute_smem_size{};
84
85 /// Maxwell and earlier nVidia architectures have broken robust support
86 bool has_broken_robust{};
85}; 87};
86 88
87} // namespace Shader 89} // namespace Shader
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a1ec1a100..804b95989 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -356,7 +356,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
356 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY, 356 .has_broken_fp16_float_controls = driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY,
357 .ignore_nan_fp_comparisons = false, 357 .ignore_nan_fp_comparisons = false,
358 .has_broken_spirv_subgroup_mask_vector_extract_dynamic = 358 .has_broken_spirv_subgroup_mask_vector_extract_dynamic =
359 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY}; 359 driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY,
360 .has_broken_robust =
361 device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Maxwell,
362 };
363
360 host_info = Shader::HostTranslateInfo{ 364 host_info = Shader::HostTranslateInfo{
361 .support_float64 = device.IsFloat64Supported(), 365 .support_float64 = device.IsFloat64Supported(),
362 .support_float16 = device.IsFloat16Supported(), 366 .support_float16 = device.IsFloat16Supported(),
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 876cec2e8..e518756d2 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -83,15 +83,6 @@ constexpr std::array VK_FORMAT_A4B4G4R4_UNORM_PACK16{
83 83
84} // namespace Alternatives 84} // namespace Alternatives
85 85
86enum class NvidiaArchitecture {
87 KeplerOrOlder,
88 Maxwell,
89 Pascal,
90 Volta,
91 Turing,
92 AmpereOrNewer,
93};
94
95template <typename T> 86template <typename T>
96void SetNext(void**& next, T& data) { 87void SetNext(void**& next, T& data) {
97 *next = &data; 88 *next = &data;
@@ -326,9 +317,9 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
326 if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) { 317 if (shading_rate_props.primitiveFragmentShadingRateWithMultipleViewports) {
327 // Only Ampere and newer support this feature 318 // Only Ampere and newer support this feature
328 // TODO: Find a way to differentiate Ampere and Ada 319 // TODO: Find a way to differentiate Ampere and Ada
329 return NvidiaArchitecture::AmpereOrNewer; 320 return NvidiaArchitecture::Arch_AmpereOrNewer;
330 } 321 }
331 return NvidiaArchitecture::Turing; 322 return NvidiaArchitecture::Arch_Turing;
332 } 323 }
333 324
334 if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) { 325 if (exts.contains(VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME)) {
@@ -340,7 +331,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
340 physical_properties.pNext = &advanced_blending_props; 331 physical_properties.pNext = &advanced_blending_props;
341 physical.GetProperties2(physical_properties); 332 physical.GetProperties2(physical_properties);
342 if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) { 333 if (advanced_blending_props.advancedBlendMaxColorAttachments == 1) {
343 return NvidiaArchitecture::Maxwell; 334 return NvidiaArchitecture::Arch_Maxwell;
344 } 335 }
345 336
346 if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) { 337 if (exts.contains(VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME)) {
@@ -350,13 +341,13 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
350 physical_properties.pNext = &conservative_raster_props; 341 physical_properties.pNext = &conservative_raster_props;
351 physical.GetProperties2(physical_properties); 342 physical.GetProperties2(physical_properties);
352 if (conservative_raster_props.degenerateLinesRasterized) { 343 if (conservative_raster_props.degenerateLinesRasterized) {
353 return NvidiaArchitecture::Volta; 344 return NvidiaArchitecture::Arch_Volta;
354 } 345 }
355 return NvidiaArchitecture::Pascal; 346 return NvidiaArchitecture::Arch_Pascal;
356 } 347 }
357 } 348 }
358 349
359 return NvidiaArchitecture::KeplerOrOlder; 350 return NvidiaArchitecture::Arch_KeplerOrOlder;
360} 351}
361 352
362std::vector<const char*> ExtensionListForVulkan( 353std::vector<const char*> ExtensionListForVulkan(
@@ -436,6 +427,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
436 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); 427 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
437 } 428 }
438 429
430 if (is_nvidia) {
431 nvidia_arch = GetNvidiaArchitecture(physical, supported_extensions);
432 }
433
439 SetupFamilies(surface); 434 SetupFamilies(surface);
440 const auto queue_cis = GetDeviceQueueCreateInfos(); 435 const auto queue_cis = GetDeviceQueueCreateInfos();
441 436
@@ -532,11 +527,11 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
532 527
533 if (is_nvidia) { 528 if (is_nvidia) {
534 const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff; 529 const u32 nv_major_version = (properties.properties.driverVersion >> 22) & 0x3ff;
535 const auto arch = GetNvidiaArchitecture(physical, supported_extensions); 530 const auto arch = GetNvidiaArch();
536 if (arch >= NvidiaArchitecture::AmpereOrNewer) { 531 if (arch >= NvidiaArchitecture::Arch_AmpereOrNewer) {
537 LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math"); 532 LOG_WARNING(Render_Vulkan, "Ampere and newer have broken float16 math");
538 features.shader_float16_int8.shaderFloat16 = false; 533 features.shader_float16_int8.shaderFloat16 = false;
539 } else if (arch <= NvidiaArchitecture::Volta) { 534 } else if (arch <= NvidiaArchitecture::Arch_Volta) {
540 if (nv_major_version < 527) { 535 if (nv_major_version < 527) {
541 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor"); 536 LOG_WARNING(Render_Vulkan, "Volta and older have broken VK_KHR_push_descriptor");
542 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); 537 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
@@ -686,8 +681,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
686 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); 681 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
687 } 682 }
688 } else if (extensions.push_descriptor && is_nvidia) { 683 } else if (extensions.push_descriptor && is_nvidia) {
689 const auto arch = GetNvidiaArchitecture(physical, supported_extensions); 684 const auto arch = GetNvidiaArch();
690 if (arch <= NvidiaArchitecture::Pascal) { 685 if (arch <= NvidiaArchitecture::Arch_Pascal) {
691 LOG_WARNING(Render_Vulkan, 686 LOG_WARNING(Render_Vulkan,
692 "Pascal and older architectures have broken VK_KHR_push_descriptor"); 687 "Pascal and older architectures have broken VK_KHR_push_descriptor");
693 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); 688 RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 282a2925d..b213ed7dd 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -177,6 +177,15 @@ enum class FormatType { Linear, Optimal, Buffer };
177/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup). 177/// Subgroup size of the guest emulated hardware (Nvidia has 32 threads per subgroup).
178const u32 GuestWarpSize = 32; 178const u32 GuestWarpSize = 32;
179 179
180enum class NvidiaArchitecture {
181 Arch_KeplerOrOlder,
182 Arch_Maxwell,
183 Arch_Pascal,
184 Arch_Volta,
185 Arch_Turing,
186 Arch_AmpereOrNewer,
187};
188
180/// Handles data specific to a physical device. 189/// Handles data specific to a physical device.
181class Device { 190class Device {
182public: 191public:
@@ -670,6 +679,14 @@ public:
670 return false; 679 return false;
671 } 680 }
672 681
682 bool IsNvidia() const noexcept {
683 return properties.driver.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY;
684 }
685
686 NvidiaArchitecture GetNvidiaArch() const noexcept {
687 return nvidia_arch;
688 }
689
673private: 690private:
674 /// Checks if the physical device is suitable and configures the object state 691 /// Checks if the physical device is suitable and configures the object state
675 /// with all necessary info about its properties. 692 /// with all necessary info about its properties.
@@ -788,6 +805,7 @@ private:
788 bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow. 805 bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
789 u64 device_access_memory{}; ///< Total size of device local memory in bytes. 806 u64 device_access_memory{}; ///< Total size of device local memory in bytes.
790 u32 sets_per_pool{}; ///< Sets per Description Pool 807 u32 sets_per_pool{}; ///< Sets per Description Pool
808 NvidiaArchitecture nvidia_arch{NvidiaArchitecture::Arch_AmpereOrNewer};
791 809
792 // Telemetry parameters 810 // Telemetry parameters
793 std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions. 811 std::set<std::string, std::less<>> supported_extensions; ///< Reported Vulkan extensions.