diff options
| author | 2019-06-05 18:10:00 -0400 | |
|---|---|---|
| committer | 2019-06-05 18:10:00 -0400 | |
| commit | a20ba09bfde185d8911f9755659ff73d5355e2b7 (patch) | |
| tree | c20a7dba849030ed2c52deb4a9769768b0f087b3 /src | |
| parent | Merge pull request #2540 from ReinUsesLisp/remove-guest-position (diff) | |
| parent | vk_device: Let formats array type be deduced (diff) | |
| download | yuzu-a20ba09bfde185d8911f9755659ff73d5355e2b7.tar.gz yuzu-a20ba09bfde185d8911f9755659ff73d5355e2b7.tar.xz yuzu-a20ba09bfde185d8911f9755659ff73d5355e2b7.zip | |
Merge pull request #2520 from ReinUsesLisp/vulkan-refresh
vk_device,vk_shader_decompiler: Miscellaneous changes
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 136 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 58 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 104 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 8 |
4 files changed, 218 insertions, 88 deletions
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 00242ecbe..3b966ddc3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { | |||
| 18 | vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; | 18 | vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; |
| 19 | constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { | 19 | constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { |
| 20 | vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; | 20 | vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; |
| 21 | constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; | ||
| 21 | 22 | ||
| 22 | } // namespace Alternatives | 23 | } // namespace Alternatives |
| 23 | 24 | ||
| @@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy | |||
| 51 | : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { | 52 | : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { |
| 52 | SetupFamilies(dldi, surface); | 53 | SetupFamilies(dldi, surface); |
| 53 | SetupProperties(dldi); | 54 | SetupProperties(dldi); |
| 55 | SetupFeatures(dldi); | ||
| 54 | } | 56 | } |
| 55 | 57 | ||
| 56 | VKDevice::~VKDevice() = default; | 58 | VKDevice::~VKDevice() = default; |
| 57 | 59 | ||
| 58 | bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { | 60 | bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { |
| 59 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 61 | vk::PhysicalDeviceFeatures device_features; |
| 60 | vk::PhysicalDeviceFeatures device_features{}; | 62 | device_features.vertexPipelineStoresAndAtomics = true; |
| 63 | device_features.independentBlend = true; | ||
| 64 | device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; | ||
| 61 | 65 | ||
| 62 | const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; | 66 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 67 | const std::vector<const char*> extensions = LoadExtensions(dldi); | ||
| 63 | const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), | 68 | const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), |
| 64 | 0, nullptr, static_cast<u32>(extensions.size()), | 69 | 0, nullptr, static_cast<u32>(extensions.size()), |
| 65 | extensions.data(), &device_features); | 70 | extensions.data(), &device_features); |
| @@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
| 90 | LOG_CRITICAL(Render_Vulkan, | 95 | LOG_CRITICAL(Render_Vulkan, |
| 91 | "Format={} with usage={} and type={} has no defined alternatives and host " | 96 | "Format={} with usage={} and type={} has no defined alternatives and host " |
| 92 | "hardware does not support it", | 97 | "hardware does not support it", |
| 93 | static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), | 98 | vk::to_string(wanted_format), vk::to_string(wanted_usage), |
| 94 | static_cast<u32>(format_type)); | 99 | static_cast<u32>(format_type)); |
| 95 | UNREACHABLE(); | 100 | UNREACHABLE(); |
| 96 | return wanted_format; | 101 | return wanted_format; |
| @@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | |||
| 118 | return wanted_format; | 123 | return wanted_format; |
| 119 | } | 124 | } |
| 120 | 125 | ||
| 126 | bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | ||
| 127 | const vk::DispatchLoaderDynamic& dldi) const { | ||
| 128 | if (!features.textureCompressionASTC_LDR) { | ||
| 129 | return false; | ||
| 130 | } | ||
| 131 | const auto format_feature_usage{ | ||
| 132 | vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | | ||
| 133 | vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | | ||
| 134 | vk::FormatFeatureFlagBits::eTransferDst}; | ||
| 135 | constexpr std::array<vk::Format, 9> astc_formats = { | ||
| 136 | vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, | ||
| 137 | vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, | ||
| 138 | vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, | ||
| 139 | vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock, | ||
| 140 | vk::Format::eAstc10x8SrgbBlock}; | ||
| 141 | for (const auto format : astc_formats) { | ||
| 142 | const auto format_properties{physical.getFormatProperties(format, dldi)}; | ||
| 143 | if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { | ||
| 144 | return false; | ||
| 145 | } | ||
| 146 | } | ||
| 147 | return true; | ||
| 148 | } | ||
| 149 | |||
| 121 | bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 150 | bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, |
| 122 | FormatType format_type) const { | 151 | FormatType format_type) const { |
| 123 | const auto it = format_properties.find(wanted_format); | 152 | const auto it = format_properties.find(wanted_format); |
| @@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag | |||
| 132 | 161 | ||
| 133 | bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 162 | bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, |
| 134 | vk::SurfaceKHR surface) { | 163 | vk::SurfaceKHR surface) { |
| 135 | const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; | ||
| 136 | |||
| 137 | bool has_swapchain{}; | 164 | bool has_swapchain{}; |
| 138 | for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 165 | for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 139 | has_swapchain |= prop.extensionName == swapchain_extension; | 166 | has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 140 | } | 167 | } |
| 141 | if (!has_swapchain) { | 168 | if (!has_swapchain) { |
| 142 | // The device doesn't support creating swapchains. | 169 | // The device doesn't support creating swapchains. |
| @@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 160 | } | 187 | } |
| 161 | 188 | ||
| 162 | // TODO(Rodrigo): Check if the device matches all requeriments. | 189 | // TODO(Rodrigo): Check if the device matches all requeriments. |
| 163 | const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); | 190 | const auto properties{physical.getProperties(dldi)}; |
| 164 | if (props.limits.maxUniformBufferRange < 65536) { | 191 | const auto limits{properties.limits}; |
| 192 | if (limits.maxUniformBufferRange < 65536) { | ||
| 193 | return false; | ||
| 194 | } | ||
| 195 | |||
| 196 | const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; | ||
| 197 | if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { | ||
| 165 | return false; | 198 | return false; |
| 166 | } | 199 | } |
| 167 | 200 | ||
| @@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 169 | return true; | 202 | return true; |
| 170 | } | 203 | } |
| 171 | 204 | ||
| 205 | std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { | ||
| 206 | std::vector<const char*> extensions; | ||
| 207 | extensions.reserve(2); | ||
| 208 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | ||
| 209 | |||
| 210 | const auto Test = [&](const vk::ExtensionProperties& extension, | ||
| 211 | std::optional<std::reference_wrapper<bool>> status, const char* name, | ||
| 212 | u32 revision) { | ||
| 213 | if (extension.extensionName != std::string(name)) { | ||
| 214 | return; | ||
| 215 | } | ||
| 216 | extensions.push_back(name); | ||
| 217 | if (status) { | ||
| 218 | status->get() = true; | ||
| 219 | } | ||
| 220 | }; | ||
| 221 | |||
| 222 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | ||
| 223 | Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); | ||
| 224 | } | ||
| 225 | |||
| 226 | return extensions; | ||
| 227 | } | ||
| 228 | |||
| 172 | void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { | 229 | void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { |
| 173 | std::optional<u32> graphics_family_, present_family_; | 230 | std::optional<u32> graphics_family_, present_family_; |
| 174 | 231 | ||
| @@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { | |||
| 196 | const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); | 253 | const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); |
| 197 | device_type = props.deviceType; | 254 | device_type = props.deviceType; |
| 198 | uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); | 255 | uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); |
| 256 | max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); | ||
| 257 | } | ||
| 258 | |||
| 259 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | ||
| 260 | const auto supported_features{physical.getFeatures(dldi)}; | ||
| 261 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | ||
| 199 | } | 262 | } |
| 200 | 263 | ||
| 201 | std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { | 264 | std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { |
| 202 | static const float QUEUE_PRIORITY = 1.f; | 265 | static const float QUEUE_PRIORITY = 1.0f; |
| 203 | 266 | ||
| 204 | std::set<u32> unique_queue_families = {graphics_family, present_family}; | 267 | std::set<u32> unique_queue_families = {graphics_family, present_family}; |
| 205 | std::vector<vk::DeviceQueueCreateInfo> queue_cis; | 268 | std::vector<vk::DeviceQueueCreateInfo> queue_cis; |
| @@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con | |||
| 212 | 275 | ||
| 213 | std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | 276 | std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( |
| 214 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { | 277 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { |
| 278 | static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, | ||
| 279 | vk::Format::eB5G6R5UnormPack16, | ||
| 280 | vk::Format::eA2B10G10R10UnormPack32, | ||
| 281 | vk::Format::eR32G32B32A32Sfloat, | ||
| 282 | vk::Format::eR16G16Unorm, | ||
| 283 | vk::Format::eR16G16Snorm, | ||
| 284 | vk::Format::eR8G8B8A8Srgb, | ||
| 285 | vk::Format::eR8Unorm, | ||
| 286 | vk::Format::eB10G11R11UfloatPack32, | ||
| 287 | vk::Format::eR32Sfloat, | ||
| 288 | vk::Format::eR16Sfloat, | ||
| 289 | vk::Format::eR16G16B16A16Sfloat, | ||
| 290 | vk::Format::eD32Sfloat, | ||
| 291 | vk::Format::eD16Unorm, | ||
| 292 | vk::Format::eD16UnormS8Uint, | ||
| 293 | vk::Format::eD24UnormS8Uint, | ||
| 294 | vk::Format::eD32SfloatS8Uint, | ||
| 295 | vk::Format::eBc1RgbaUnormBlock, | ||
| 296 | vk::Format::eBc2UnormBlock, | ||
| 297 | vk::Format::eBc3UnormBlock, | ||
| 298 | vk::Format::eBc4UnormBlock, | ||
| 299 | vk::Format::eBc5UnormBlock, | ||
| 300 | vk::Format::eBc5SnormBlock, | ||
| 301 | vk::Format::eBc7UnormBlock, | ||
| 302 | vk::Format::eAstc4x4UnormBlock, | ||
| 303 | vk::Format::eAstc4x4SrgbBlock, | ||
| 304 | vk::Format::eAstc8x8SrgbBlock, | ||
| 305 | vk::Format::eAstc8x6SrgbBlock, | ||
| 306 | vk::Format::eAstc5x4SrgbBlock, | ||
| 307 | vk::Format::eAstc5x5UnormBlock, | ||
| 308 | vk::Format::eAstc5x5SrgbBlock, | ||
| 309 | vk::Format::eAstc10x8UnormBlock, | ||
| 310 | vk::Format::eAstc10x8SrgbBlock}; | ||
| 215 | std::map<vk::Format, vk::FormatProperties> format_properties; | 311 | std::map<vk::Format, vk::FormatProperties> format_properties; |
| 216 | 312 | for (const auto format : formats) { | |
| 217 | const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { | ||
| 218 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); | 313 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); |
| 219 | }; | 314 | } |
| 220 | AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); | ||
| 221 | AddFormatQuery(vk::Format::eB5G6R5UnormPack16); | ||
| 222 | AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); | ||
| 223 | AddFormatQuery(vk::Format::eR8G8B8A8Srgb); | ||
| 224 | AddFormatQuery(vk::Format::eR8Unorm); | ||
| 225 | AddFormatQuery(vk::Format::eD32Sfloat); | ||
| 226 | AddFormatQuery(vk::Format::eD16Unorm); | ||
| 227 | AddFormatQuery(vk::Format::eD16UnormS8Uint); | ||
| 228 | AddFormatQuery(vk::Format::eD24UnormS8Uint); | ||
| 229 | AddFormatQuery(vk::Format::eD32SfloatS8Uint); | ||
| 230 | AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); | ||
| 231 | AddFormatQuery(vk::Format::eBc2UnormBlock); | ||
| 232 | AddFormatQuery(vk::Format::eBc3UnormBlock); | ||
| 233 | AddFormatQuery(vk::Format::eBc4UnormBlock); | ||
| 234 | |||
| 235 | return format_properties; | 315 | return format_properties; |
| 236 | } | 316 | } |
| 237 | 317 | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index e87c7a508..537825d8b 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | namespace Vulkan { | 12 | namespace Vulkan { |
| 13 | 13 | ||
| 14 | /// Format usage descriptor | 14 | /// Format usage descriptor. |
| 15 | enum class FormatType { Linear, Optimal, Buffer }; | 15 | enum class FormatType { Linear, Optimal, Buffer }; |
| 16 | 16 | ||
| 17 | /// Handles data specific to a physical device. | 17 | /// Handles data specific to a physical device. |
| @@ -34,12 +34,12 @@ public: | |||
| 34 | vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 34 | vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, |
| 35 | FormatType format_type) const; | 35 | FormatType format_type) const; |
| 36 | 36 | ||
| 37 | /// Returns the dispatch loader with direct function pointers of the device | 37 | /// Returns the dispatch loader with direct function pointers of the device. |
| 38 | const vk::DispatchLoaderDynamic& GetDispatchLoader() const { | 38 | const vk::DispatchLoaderDynamic& GetDispatchLoader() const { |
| 39 | return dld; | 39 | return dld; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /// Returns the logical device | 42 | /// Returns the logical device. |
| 43 | vk::Device GetLogical() const { | 43 | vk::Device GetLogical() const { |
| 44 | return logical.get(); | 44 | return logical.get(); |
| 45 | } | 45 | } |
| @@ -69,30 +69,55 @@ public: | |||
| 69 | return present_family; | 69 | return present_family; |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | /// Returns if the device is integrated with the host CPU | 72 | /// Returns if the device is integrated with the host CPU. |
| 73 | bool IsIntegrated() const { | 73 | bool IsIntegrated() const { |
| 74 | return device_type == vk::PhysicalDeviceType::eIntegratedGpu; | 74 | return device_type == vk::PhysicalDeviceType::eIntegratedGpu; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | /// Returns uniform buffer alignment requeriment | 77 | /// Returns uniform buffer alignment requeriment. |
| 78 | u64 GetUniformBufferAlignment() const { | 78 | u64 GetUniformBufferAlignment() const { |
| 79 | return uniform_buffer_alignment; | 79 | return uniform_buffer_alignment; |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | /// Returns the maximum range for storage buffers. | ||
| 83 | u64 GetMaxStorageBufferRange() const { | ||
| 84 | return max_storage_buffer_range; | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Returns true if ASTC is natively supported. | ||
| 88 | bool IsOptimalAstcSupported() const { | ||
| 89 | return is_optimal_astc_supported; | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | ||
| 93 | bool IsExtScalarBlockLayoutSupported() const { | ||
| 94 | return ext_scalar_block_layout; | ||
| 95 | } | ||
| 96 | |||
| 82 | /// Checks if the physical device is suitable. | 97 | /// Checks if the physical device is suitable. |
| 83 | static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 98 | static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, |
| 84 | vk::SurfaceKHR surface); | 99 | vk::SurfaceKHR surface); |
| 85 | 100 | ||
| 86 | private: | 101 | private: |
| 102 | /// Loads extensions into a vector and stores available ones in this object. | ||
| 103 | std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); | ||
| 104 | |||
| 87 | /// Sets up queue families. | 105 | /// Sets up queue families. |
| 88 | void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); | 106 | void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); |
| 89 | 107 | ||
| 90 | /// Sets up device properties. | 108 | /// Sets up device properties. |
| 91 | void SetupProperties(const vk::DispatchLoaderDynamic& dldi); | 109 | void SetupProperties(const vk::DispatchLoaderDynamic& dldi); |
| 92 | 110 | ||
| 111 | /// Sets up device features. | ||
| 112 | void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); | ||
| 113 | |||
| 93 | /// Returns a list of queue initialization descriptors. | 114 | /// Returns a list of queue initialization descriptors. |
| 94 | std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | 115 | std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; |
| 95 | 116 | ||
| 117 | /// Returns true if ASTC textures are natively supported. | ||
| 118 | bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | ||
| 119 | const vk::DispatchLoaderDynamic& dldi) const; | ||
| 120 | |||
| 96 | /// Returns true if a format is supported. | 121 | /// Returns true if a format is supported. |
| 97 | bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 122 | bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, |
| 98 | FormatType format_type) const; | 123 | FormatType format_type) const; |
| @@ -101,16 +126,19 @@ private: | |||
| 101 | static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( | 126 | static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 102 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 127 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 103 | 128 | ||
| 104 | const vk::PhysicalDevice physical; ///< Physical device | 129 | const vk::PhysicalDevice physical; ///< Physical device. |
| 105 | vk::DispatchLoaderDynamic dld; ///< Device function pointers | 130 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 106 | UniqueDevice logical; ///< Logical device | 131 | UniqueDevice logical; ///< Logical device. |
| 107 | vk::Queue graphics_queue; ///< Main graphics queue | 132 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 108 | vk::Queue present_queue; ///< Main present queue | 133 | vk::Queue present_queue; ///< Main present queue. |
| 109 | u32 graphics_family{}; ///< Main graphics queue family index | 134 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 110 | u32 present_family{}; ///< Main present queue family index | 135 | u32 present_family{}; ///< Main present queue family index. |
| 111 | vk::PhysicalDeviceType device_type; ///< Physical device type | 136 | vk::PhysicalDeviceType device_type; ///< Physical device type. |
| 112 | u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment | 137 | u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. |
| 113 | std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary | 138 | u64 max_storage_buffer_range{}; ///< Max storage buffer size. |
| 139 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | ||
| 140 | bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. | ||
| 141 | std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. | ||
| 114 | }; | 142 | }; |
| 115 | 143 | ||
| 116 | } // namespace Vulkan | 144 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a5b25aeff..a85fcae5a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/engines/shader_bytecode.h" | 18 | #include "video_core/engines/shader_bytecode.h" |
| 19 | #include "video_core/engines/shader_header.h" | 19 | #include "video_core/engines/shader_header.h" |
| 20 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 21 | #include "video_core/shader/shader_ir.h" | 22 | #include "video_core/shader/shader_ir.h" |
| 22 | 23 | ||
| @@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | |||
| 33 | using Operation = const OperationNode&; | 34 | using Operation = const OperationNode&; |
| 34 | 35 | ||
| 35 | // TODO(Rodrigo): Use rasterizer's value | 36 | // TODO(Rodrigo): Use rasterizer's value |
| 36 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; | 37 | constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; |
| 38 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; | ||
| 37 | constexpr u32 STAGE_BINDING_STRIDE = 0x100; | 39 | constexpr u32 STAGE_BINDING_STRIDE = 0x100; |
| 38 | 40 | ||
| 39 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 41 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { | |||
| 87 | 89 | ||
| 88 | class SPIRVDecompiler : public Sirit::Module { | 90 | class SPIRVDecompiler : public Sirit::Module { |
| 89 | public: | 91 | public: |
| 90 | explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) | 92 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) |
| 91 | : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { | 93 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { |
| 92 | AddCapability(spv::Capability::Shader); | 94 | AddCapability(spv::Capability::Shader); |
| 93 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | 95 | AddExtension("SPV_KHR_storage_buffer_storage_class"); |
| 94 | AddExtension("SPV_KHR_variable_pointers"); | 96 | AddExtension("SPV_KHR_variable_pointers"); |
| @@ -195,7 +197,9 @@ public: | |||
| 195 | entries.samplers.emplace_back(sampler); | 197 | entries.samplers.emplace_back(sampler); |
| 196 | } | 198 | } |
| 197 | for (const auto& attribute : ir.GetInputAttributes()) { | 199 | for (const auto& attribute : ir.GetInputAttributes()) { |
| 198 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | 200 | if (IsGenericAttribute(attribute)) { |
| 201 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | ||
| 202 | } | ||
| 199 | } | 203 | } |
| 200 | entries.clip_distances = ir.GetClipDistances(); | 204 | entries.clip_distances = ir.GetClipDistances(); |
| 201 | entries.shader_length = ir.GetLength(); | 205 | entries.shader_length = ir.GetLength(); |
| @@ -210,7 +214,6 @@ private: | |||
| 210 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | 214 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; |
| 211 | 215 | ||
| 212 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); | 216 | static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); |
| 213 | static constexpr u32 CBUF_STRIDE = 16; | ||
| 214 | 217 | ||
| 215 | void AllocateBindings() { | 218 | void AllocateBindings() { |
| 216 | const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; | 219 | const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; |
| @@ -315,6 +318,7 @@ private: | |||
| 315 | constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", | 318 | constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", |
| 316 | "overflow"}; | 319 | "overflow"}; |
| 317 | for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { | 320 | for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { |
| 321 | const auto flag_code = static_cast<InternalFlag>(flag); | ||
| 318 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | 322 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); |
| 319 | internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); | 323 | internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); |
| 320 | } | 324 | } |
| @@ -374,7 +378,9 @@ private: | |||
| 374 | u32 binding = const_buffers_base_binding; | 378 | u32 binding = const_buffers_base_binding; |
| 375 | for (const auto& entry : ir.GetConstantBuffers()) { | 379 | for (const auto& entry : ir.GetConstantBuffers()) { |
| 376 | const auto [index, size] = entry; | 380 | const auto [index, size] = entry; |
| 377 | const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); | 381 | const Id type = |
| 382 | device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; | ||
| 383 | const Id id = OpVariable(type, spv::StorageClass::Uniform); | ||
| 378 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); | 384 | AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); |
| 379 | 385 | ||
| 380 | Decorate(id, spv::Decoration::Binding, binding++); | 386 | Decorate(id, spv::Decoration::Binding, binding++); |
| @@ -569,33 +575,35 @@ private: | |||
| 569 | const Node offset = cbuf->GetOffset(); | 575 | const Node offset = cbuf->GetOffset(); |
| 570 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); | 576 | const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); |
| 571 | 577 | ||
| 572 | Id buffer_index{}; | 578 | Id pointer{}; |
| 573 | Id buffer_element{}; | 579 | if (device.IsExtScalarBlockLayoutSupported()) { |
| 574 | 580 | const Id buffer_offset = Emit(OpShiftRightLogical( | |
| 575 | if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | 581 | t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); |
| 576 | // Direct access | 582 | pointer = Emit( |
| 577 | const u32 offset_imm = immediate->GetValue(); | 583 | OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); |
| 578 | ASSERT(offset_imm % 4 == 0); | ||
| 579 | buffer_index = Constant(t_uint, offset_imm / 16); | ||
| 580 | buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||
| 581 | |||
| 582 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 583 | // Indirect access | ||
| 584 | // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which | ||
| 585 | // emits sub-optimal code on GLSL from my testing). | ||
| 586 | const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); | ||
| 587 | const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); | ||
| 588 | const Id final_offset = Emit( | ||
| 589 | OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); | ||
| 590 | buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 591 | buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 592 | |||
| 593 | } else { | 584 | } else { |
| 594 | UNREACHABLE_MSG("Unmanaged offset node type"); | 585 | Id buffer_index{}; |
| 586 | Id buffer_element{}; | ||
| 587 | if (const auto immediate = std::get_if<ImmediateNode>(offset)) { | ||
| 588 | // Direct access | ||
| 589 | const u32 offset_imm = immediate->GetValue(); | ||
| 590 | ASSERT(offset_imm % 4 == 0); | ||
| 591 | buffer_index = Constant(t_uint, offset_imm / 16); | ||
| 592 | buffer_element = Constant(t_uint, (offset_imm / 4) % 4); | ||
| 593 | } else if (std::holds_alternative<OperationNode>(*offset)) { | ||
| 594 | // Indirect access | ||
| 595 | const Id offset_id = BitcastTo<Type::Uint>(Visit(offset)); | ||
| 596 | const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); | ||
| 597 | const Id final_offset = Emit(OpUMod( | ||
| 598 | t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); | ||
| 599 | buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 600 | buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); | ||
| 601 | } else { | ||
| 602 | UNREACHABLE_MSG("Unmanaged offset node type"); | ||
| 603 | } | ||
| 604 | pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), | ||
| 605 | buffer_index, buffer_element)); | ||
| 595 | } | 606 | } |
| 596 | |||
| 597 | const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), | ||
| 598 | buffer_index, buffer_element)); | ||
| 599 | return Emit(OpLoad(t_float, pointer)); | 607 | return Emit(OpLoad(t_float, pointer)); |
| 600 | 608 | ||
| 601 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { | 609 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { |
| @@ -612,7 +620,9 @@ private: | |||
| 612 | // It's invalid to call conditional on nested nodes, use an operation instead | 620 | // It's invalid to call conditional on nested nodes, use an operation instead |
| 613 | const Id true_label = OpLabel(); | 621 | const Id true_label = OpLabel(); |
| 614 | const Id skip_label = OpLabel(); | 622 | const Id skip_label = OpLabel(); |
| 615 | Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); | 623 | const Id condition = Visit(conditional->GetCondition()); |
| 624 | Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); | ||
| 625 | Emit(OpBranchConditional(condition, true_label, skip_label)); | ||
| 616 | Emit(true_label); | 626 | Emit(true_label); |
| 617 | 627 | ||
| 618 | VisitBasicBlock(conditional->GetCode()); | 628 | VisitBasicBlock(conditional->GetCode()); |
| @@ -968,11 +978,11 @@ private: | |||
| 968 | case ShaderStage::Vertex: { | 978 | case ShaderStage::Vertex: { |
| 969 | // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't | 979 | // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't |
| 970 | // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. | 980 | // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. |
| 971 | const Id position = AccessElement(t_float4, per_vertex, position_index); | 981 | const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); |
| 972 | Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); | 982 | Id depth = Emit(OpLoad(t_float, z_pointer)); |
| 973 | depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); | 983 | depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); |
| 974 | depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); | 984 | depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); |
| 975 | Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); | 985 | Emit(OpStore(z_pointer, depth)); |
| 976 | break; | 986 | break; |
| 977 | } | 987 | } |
| 978 | case ShaderStage::Fragment: { | 988 | case ShaderStage::Fragment: { |
| @@ -1311,6 +1321,7 @@ private: | |||
| 1311 | &SPIRVDecompiler::WorkGroupId<2>, | 1321 | &SPIRVDecompiler::WorkGroupId<2>, |
| 1312 | }; | 1322 | }; |
| 1313 | 1323 | ||
| 1324 | const VKDevice& device; | ||
| 1314 | const ShaderIR& ir; | 1325 | const ShaderIR& ir; |
| 1315 | const ShaderStage stage; | 1326 | const ShaderStage stage; |
| 1316 | const Tegra::Shader::Header header; | 1327 | const Tegra::Shader::Header header; |
| @@ -1349,12 +1360,18 @@ private: | |||
| 1349 | const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); | 1360 | const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); |
| 1350 | 1361 | ||
| 1351 | const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); | 1362 | const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); |
| 1352 | const Id t_cbuf_array = | 1363 | const Id t_cbuf_std140 = Decorate( |
| 1353 | Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), | 1364 | Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), |
| 1354 | spv::Decoration::ArrayStride, CBUF_STRIDE); | 1365 | spv::Decoration::ArrayStride, 16u); |
| 1355 | const Id t_cbuf_struct = MemberDecorate( | 1366 | const Id t_cbuf_scalar = Decorate( |
| 1356 | Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | 1367 | Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), |
| 1357 | const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); | 1368 | spv::Decoration::ArrayStride, 4u); |
| 1369 | const Id t_cbuf_std140_struct = MemberDecorate( | ||
| 1370 | Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 1371 | const Id t_cbuf_scalar_struct = MemberDecorate( | ||
| 1372 | Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||
| 1373 | const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); | ||
| 1374 | const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); | ||
| 1358 | 1375 | ||
| 1359 | const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); | 1376 | const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); |
| 1360 | const Id t_gmem_array = | 1377 | const Id t_gmem_array = |
| @@ -1403,8 +1420,9 @@ private: | |||
| 1403 | std::map<u32, Id> labels; | 1420 | std::map<u32, Id> labels; |
| 1404 | }; | 1421 | }; |
| 1405 | 1422 | ||
| 1406 | DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { | 1423 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 1407 | auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); | 1424 | Maxwell::ShaderStage stage) { |
| 1425 | auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); | ||
| 1408 | decompiler->Decompile(); | 1426 | decompiler->Decompile(); |
| 1409 | return {std::move(decompiler), decompiler->GetShaderEntries()}; | 1427 | return {std::move(decompiler), decompiler->GetShaderEntries()}; |
| 1410 | } | 1428 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -20,10 +20,13 @@ namespace VideoCommon::Shader { | |||
| 20 | class ShaderIR; | 20 | class ShaderIR; |
| 21 | } | 21 | } |
| 22 | 22 | ||
| 23 | namespace Vulkan { | ||
| 24 | class VKDevice; | ||
| 25 | } | ||
| 26 | |||
| 23 | namespace Vulkan::VKShader { | 27 | namespace Vulkan::VKShader { |
| 24 | 28 | ||
| 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 29 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 26 | |||
| 27 | using SamplerEntry = VideoCommon::Shader::Sampler; | 30 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 28 | 31 | ||
| 29 | constexpr u32 DESCRIPTOR_SET = 0; | 32 | constexpr u32 DESCRIPTOR_SET = 0; |
| @@ -75,6 +78,7 @@ struct ShaderEntries { | |||
| 75 | 78 | ||
| 76 | using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; | 79 | using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; |
| 77 | 80 | ||
| 78 | DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); | 81 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 82 | Maxwell::ShaderStage stage); | ||
| 79 | 83 | ||
| 80 | } // namespace Vulkan::VKShader | 84 | } // namespace Vulkan::VKShader |