summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-06-05 18:10:00 -0400
committerGravatar GitHub2019-06-05 18:10:00 -0400
commita20ba09bfde185d8911f9755659ff73d5355e2b7 (patch)
treec20a7dba849030ed2c52deb4a9769768b0f087b3 /src
parentMerge pull request #2540 from ReinUsesLisp/remove-guest-position (diff)
parentvk_device: Let formats array type be deduced (diff)
downloadyuzu-a20ba09bfde185d8911f9755659ff73d5355e2b7.tar.gz
yuzu-a20ba09bfde185d8911f9755659ff73d5355e2b7.tar.xz
yuzu-a20ba09bfde185d8911f9755659ff73d5355e2b7.zip
Merge pull request #2520 from ReinUsesLisp/vulkan-refresh
vk_device,vk_shader_decompiler: Miscellaneous changes
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp136
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp104
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h8
4 files changed, 218 insertions, 88 deletions
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 00242ecbe..3b966ddc3 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
18 vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; 18 vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
19constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { 19constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
20 vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; 20 vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
21constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
21 22
22} // namespace Alternatives 23} // namespace Alternatives
23 24
@@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
51 : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { 52 : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
52 SetupFamilies(dldi, surface); 53 SetupFamilies(dldi, surface);
53 SetupProperties(dldi); 54 SetupProperties(dldi);
55 SetupFeatures(dldi);
54} 56}
55 57
56VKDevice::~VKDevice() = default; 58VKDevice::~VKDevice() = default;
57 59
58bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { 60bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
59 const auto queue_cis = GetDeviceQueueCreateInfos(); 61 vk::PhysicalDeviceFeatures device_features;
60 vk::PhysicalDeviceFeatures device_features{}; 62 device_features.vertexPipelineStoresAndAtomics = true;
63 device_features.independentBlend = true;
64 device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
61 65
62 const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; 66 const auto queue_cis = GetDeviceQueueCreateInfos();
67 const std::vector<const char*> extensions = LoadExtensions(dldi);
63 const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 68 const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
64 0, nullptr, static_cast<u32>(extensions.size()), 69 0, nullptr, static_cast<u32>(extensions.size()),
65 extensions.data(), &device_features); 70 extensions.data(), &device_features);
@@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
90 LOG_CRITICAL(Render_Vulkan, 95 LOG_CRITICAL(Render_Vulkan,
91 "Format={} with usage={} and type={} has no defined alternatives and host " 96 "Format={} with usage={} and type={} has no defined alternatives and host "
92 "hardware does not support it", 97 "hardware does not support it",
93 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), 98 vk::to_string(wanted_format), vk::to_string(wanted_usage),
94 static_cast<u32>(format_type)); 99 static_cast<u32>(format_type));
95 UNREACHABLE(); 100 UNREACHABLE();
96 return wanted_format; 101 return wanted_format;
@@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
118 return wanted_format; 123 return wanted_format;
119} 124}
120 125
126bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
127 const vk::DispatchLoaderDynamic& dldi) const {
128 if (!features.textureCompressionASTC_LDR) {
129 return false;
130 }
131 const auto format_feature_usage{
132 vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
133 vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
134 vk::FormatFeatureFlagBits::eTransferDst};
135 constexpr std::array<vk::Format, 9> astc_formats = {
136 vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
137 vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
138 vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
139 vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock,
140 vk::Format::eAstc10x8SrgbBlock};
141 for (const auto format : astc_formats) {
142 const auto format_properties{physical.getFormatProperties(format, dldi)};
143 if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
144 return false;
145 }
146 }
147 return true;
148}
149
121bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 150bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
122 FormatType format_type) const { 151 FormatType format_type) const {
123 const auto it = format_properties.find(wanted_format); 152 const auto it = format_properties.find(wanted_format);
@@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
132 161
133bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 162bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
134 vk::SurfaceKHR surface) { 163 vk::SurfaceKHR surface) {
135 const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
136
137 bool has_swapchain{}; 164 bool has_swapchain{};
138 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 165 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
139 has_swapchain |= prop.extensionName == swapchain_extension; 166 has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
140 } 167 }
141 if (!has_swapchain) { 168 if (!has_swapchain) {
142 // The device doesn't support creating swapchains. 169 // The device doesn't support creating swapchains.
@@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
160 } 187 }
161 188
162 // TODO(Rodrigo): Check if the device matches all requeriments. 189 // TODO(Rodrigo): Check if the device matches all requeriments.
163 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); 190 const auto properties{physical.getProperties(dldi)};
164 if (props.limits.maxUniformBufferRange < 65536) { 191 const auto limits{properties.limits};
192 if (limits.maxUniformBufferRange < 65536) {
193 return false;
194 }
195
196 const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
197 if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
165 return false; 198 return false;
166 } 199 }
167 200
@@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
169 return true; 202 return true;
170} 203}
171 204
205std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
206 std::vector<const char*> extensions;
207 extensions.reserve(2);
208 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
209
210 const auto Test = [&](const vk::ExtensionProperties& extension,
211 std::optional<std::reference_wrapper<bool>> status, const char* name,
212 u32 revision) {
213 if (extension.extensionName != std::string(name)) {
214 return;
215 }
216 extensions.push_back(name);
217 if (status) {
218 status->get() = true;
219 }
220 };
221
222 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
223 Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
224 }
225
226 return extensions;
227}
228
172void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { 229void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
173 std::optional<u32> graphics_family_, present_family_; 230 std::optional<u32> graphics_family_, present_family_;
174 231
@@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
196 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); 253 const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
197 device_type = props.deviceType; 254 device_type = props.deviceType;
198 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); 255 uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
256 max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
257}
258
259void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
260 const auto supported_features{physical.getFeatures(dldi)};
261 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
199} 262}
200 263
201std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { 264std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
202 static const float QUEUE_PRIORITY = 1.f; 265 static const float QUEUE_PRIORITY = 1.0f;
203 266
204 std::set<u32> unique_queue_families = {graphics_family, present_family}; 267 std::set<u32> unique_queue_families = {graphics_family, present_family};
205 std::vector<vk::DeviceQueueCreateInfo> queue_cis; 268 std::vector<vk::DeviceQueueCreateInfo> queue_cis;
@@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
212 275
213std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( 276std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
214 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { 277 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
278 static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
279 vk::Format::eB5G6R5UnormPack16,
280 vk::Format::eA2B10G10R10UnormPack32,
281 vk::Format::eR32G32B32A32Sfloat,
282 vk::Format::eR16G16Unorm,
283 vk::Format::eR16G16Snorm,
284 vk::Format::eR8G8B8A8Srgb,
285 vk::Format::eR8Unorm,
286 vk::Format::eB10G11R11UfloatPack32,
287 vk::Format::eR32Sfloat,
288 vk::Format::eR16Sfloat,
289 vk::Format::eR16G16B16A16Sfloat,
290 vk::Format::eD32Sfloat,
291 vk::Format::eD16Unorm,
292 vk::Format::eD16UnormS8Uint,
293 vk::Format::eD24UnormS8Uint,
294 vk::Format::eD32SfloatS8Uint,
295 vk::Format::eBc1RgbaUnormBlock,
296 vk::Format::eBc2UnormBlock,
297 vk::Format::eBc3UnormBlock,
298 vk::Format::eBc4UnormBlock,
299 vk::Format::eBc5UnormBlock,
300 vk::Format::eBc5SnormBlock,
301 vk::Format::eBc7UnormBlock,
302 vk::Format::eAstc4x4UnormBlock,
303 vk::Format::eAstc4x4SrgbBlock,
304 vk::Format::eAstc8x8SrgbBlock,
305 vk::Format::eAstc8x6SrgbBlock,
306 vk::Format::eAstc5x4SrgbBlock,
307 vk::Format::eAstc5x5UnormBlock,
308 vk::Format::eAstc5x5SrgbBlock,
309 vk::Format::eAstc10x8UnormBlock,
310 vk::Format::eAstc10x8SrgbBlock};
215 std::map<vk::Format, vk::FormatProperties> format_properties; 311 std::map<vk::Format, vk::FormatProperties> format_properties;
216 312 for (const auto format : formats) {
217 const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
218 format_properties.emplace(format, physical.getFormatProperties(format, dldi)); 313 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
219 }; 314 }
220 AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
221 AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
222 AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
223 AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
224 AddFormatQuery(vk::Format::eR8Unorm);
225 AddFormatQuery(vk::Format::eD32Sfloat);
226 AddFormatQuery(vk::Format::eD16Unorm);
227 AddFormatQuery(vk::Format::eD16UnormS8Uint);
228 AddFormatQuery(vk::Format::eD24UnormS8Uint);
229 AddFormatQuery(vk::Format::eD32SfloatS8Uint);
230 AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
231 AddFormatQuery(vk::Format::eBc2UnormBlock);
232 AddFormatQuery(vk::Format::eBc3UnormBlock);
233 AddFormatQuery(vk::Format::eBc4UnormBlock);
234
235 return format_properties; 315 return format_properties;
236} 316}
237 317
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index e87c7a508..537825d8b 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -11,7 +11,7 @@
11 11
12namespace Vulkan { 12namespace Vulkan {
13 13
14/// Format usage descriptor 14/// Format usage descriptor.
15enum class FormatType { Linear, Optimal, Buffer }; 15enum class FormatType { Linear, Optimal, Buffer };
16 16
17/// Handles data specific to a physical device. 17/// Handles data specific to a physical device.
@@ -34,12 +34,12 @@ public:
34 vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 34 vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
35 FormatType format_type) const; 35 FormatType format_type) const;
36 36
37 /// Returns the dispatch loader with direct function pointers of the device 37 /// Returns the dispatch loader with direct function pointers of the device.
38 const vk::DispatchLoaderDynamic& GetDispatchLoader() const { 38 const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
39 return dld; 39 return dld;
40 } 40 }
41 41
42 /// Returns the logical device 42 /// Returns the logical device.
43 vk::Device GetLogical() const { 43 vk::Device GetLogical() const {
44 return logical.get(); 44 return logical.get();
45 } 45 }
@@ -69,30 +69,55 @@ public:
69 return present_family; 69 return present_family;
70 } 70 }
71 71
72 /// Returns if the device is integrated with the host CPU 72 /// Returns if the device is integrated with the host CPU.
73 bool IsIntegrated() const { 73 bool IsIntegrated() const {
74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu; 74 return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
75 } 75 }
76 76
77 /// Returns uniform buffer alignment requeriment 77 /// Returns uniform buffer alignment requeriment.
78 u64 GetUniformBufferAlignment() const { 78 u64 GetUniformBufferAlignment() const {
79 return uniform_buffer_alignment; 79 return uniform_buffer_alignment;
80 } 80 }
81 81
82 /// Returns the maximum range for storage buffers.
83 u64 GetMaxStorageBufferRange() const {
84 return max_storage_buffer_range;
85 }
86
87 /// Returns true if ASTC is natively supported.
88 bool IsOptimalAstcSupported() const {
89 return is_optimal_astc_supported;
90 }
91
92 /// Returns true if the device supports VK_EXT_scalar_block_layout.
93 bool IsExtScalarBlockLayoutSupported() const {
94 return ext_scalar_block_layout;
95 }
96
82 /// Checks if the physical device is suitable. 97 /// Checks if the physical device is suitable.
83 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 98 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
84 vk::SurfaceKHR surface); 99 vk::SurfaceKHR surface);
85 100
86private: 101private:
102 /// Loads extensions into a vector and stores available ones in this object.
103 std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
104
87 /// Sets up queue families. 105 /// Sets up queue families.
88 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); 106 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
89 107
90 /// Sets up device properties. 108 /// Sets up device properties.
91 void SetupProperties(const vk::DispatchLoaderDynamic& dldi); 109 void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
92 110
111 /// Sets up device features.
112 void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
113
93 /// Returns a list of queue initialization descriptors. 114 /// Returns a list of queue initialization descriptors.
94 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 115 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
95 116
117 /// Returns true if ASTC textures are natively supported.
118 bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
119 const vk::DispatchLoaderDynamic& dldi) const;
120
96 /// Returns true if a format is supported. 121 /// Returns true if a format is supported.
97 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 122 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
98 FormatType format_type) const; 123 FormatType format_type) const;
@@ -101,16 +126,19 @@ private:
101 static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( 126 static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
102 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 127 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
103 128
104 const vk::PhysicalDevice physical; ///< Physical device 129 const vk::PhysicalDevice physical; ///< Physical device.
105 vk::DispatchLoaderDynamic dld; ///< Device function pointers 130 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
106 UniqueDevice logical; ///< Logical device 131 UniqueDevice logical; ///< Logical device.
107 vk::Queue graphics_queue; ///< Main graphics queue 132 vk::Queue graphics_queue; ///< Main graphics queue.
108 vk::Queue present_queue; ///< Main present queue 133 vk::Queue present_queue; ///< Main present queue.
109 u32 graphics_family{}; ///< Main graphics queue family index 134 u32 graphics_family{}; ///< Main graphics queue family index.
110 u32 present_family{}; ///< Main present queue family index 135 u32 present_family{}; ///< Main present queue family index.
111 vk::PhysicalDeviceType device_type; ///< Physical device type 136 vk::PhysicalDeviceType device_type; ///< Physical device type.
112 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment 137 u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
113 std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary 138 u64 max_storage_buffer_range{}; ///< Max storage buffer size.
139 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
140 bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout.
141 std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
114}; 142};
115 143
116} // namespace Vulkan 144} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a5b25aeff..a85fcae5a 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
17#include "video_core/engines/maxwell_3d.h" 17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/engines/shader_bytecode.h" 18#include "video_core/engines/shader_bytecode.h"
19#include "video_core/engines/shader_header.h" 19#include "video_core/engines/shader_header.h"
20#include "video_core/renderer_vulkan/vk_device.h"
20#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 21#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
21#include "video_core/shader/shader_ir.h" 22#include "video_core/shader/shader_ir.h"
22 23
@@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
33using Operation = const OperationNode&; 34using Operation = const OperationNode&;
34 35
35// TODO(Rodrigo): Use rasterizer's value 36// TODO(Rodrigo): Use rasterizer's value
36constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; 37constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
38constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
37constexpr u32 STAGE_BINDING_STRIDE = 0x100; 39constexpr u32 STAGE_BINDING_STRIDE = 0x100;
38 40
39enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 41enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
87 89
88class SPIRVDecompiler : public Sirit::Module { 90class SPIRVDecompiler : public Sirit::Module {
89public: 91public:
90 explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) 92 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
91 : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { 93 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
92 AddCapability(spv::Capability::Shader); 94 AddCapability(spv::Capability::Shader);
93 AddExtension("SPV_KHR_storage_buffer_storage_class"); 95 AddExtension("SPV_KHR_storage_buffer_storage_class");
94 AddExtension("SPV_KHR_variable_pointers"); 96 AddExtension("SPV_KHR_variable_pointers");
@@ -195,7 +197,9 @@ public:
195 entries.samplers.emplace_back(sampler); 197 entries.samplers.emplace_back(sampler);
196 } 198 }
197 for (const auto& attribute : ir.GetInputAttributes()) { 199 for (const auto& attribute : ir.GetInputAttributes()) {
198 entries.attributes.insert(GetGenericAttributeLocation(attribute)); 200 if (IsGenericAttribute(attribute)) {
201 entries.attributes.insert(GetGenericAttributeLocation(attribute));
202 }
199 } 203 }
200 entries.clip_distances = ir.GetClipDistances(); 204 entries.clip_distances = ir.GetClipDistances();
201 entries.shader_length = ir.GetLength(); 205 entries.shader_length = ir.GetLength();
@@ -210,7 +214,6 @@ private:
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; 214 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211 215
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 216 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 static constexpr u32 CBUF_STRIDE = 16;
214 217
215 void AllocateBindings() { 218 void AllocateBindings() {
216 const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE; 219 const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@@ -315,6 +318,7 @@ private:
315 constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry", 318 constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
316 "overflow"}; 319 "overflow"};
317 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { 320 for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
321 const auto flag_code = static_cast<InternalFlag>(flag);
318 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); 322 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
319 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); 323 internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
320 } 324 }
@@ -374,7 +378,9 @@ private:
374 u32 binding = const_buffers_base_binding; 378 u32 binding = const_buffers_base_binding;
375 for (const auto& entry : ir.GetConstantBuffers()) { 379 for (const auto& entry : ir.GetConstantBuffers()) {
376 const auto [index, size] = entry; 380 const auto [index, size] = entry;
377 const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); 381 const Id type =
382 device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
383 const Id id = OpVariable(type, spv::StorageClass::Uniform);
378 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); 384 AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
379 385
380 Decorate(id, spv::Decoration::Binding, binding++); 386 Decorate(id, spv::Decoration::Binding, binding++);
@@ -569,33 +575,35 @@ private:
569 const Node offset = cbuf->GetOffset(); 575 const Node offset = cbuf->GetOffset();
570 const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); 576 const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
571 577
572 Id buffer_index{}; 578 Id pointer{};
573 Id buffer_element{}; 579 if (device.IsExtScalarBlockLayoutSupported()) {
574 580 const Id buffer_offset = Emit(OpShiftRightLogical(
575 if (const auto immediate = std::get_if<ImmediateNode>(offset)) { 581 t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
576 // Direct access 582 pointer = Emit(
577 const u32 offset_imm = immediate->GetValue(); 583 OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
578 ASSERT(offset_imm % 4 == 0);
579 buffer_index = Constant(t_uint, offset_imm / 16);
580 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
581
582 } else if (std::holds_alternative<OperationNode>(*offset)) {
583 // Indirect access
584 // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
585 // emits sub-optimal code on GLSL from my testing).
586 const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
587 const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
588 const Id final_offset = Emit(
589 OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
590 buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
591 buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
592
593 } else { 584 } else {
594 UNREACHABLE_MSG("Unmanaged offset node type"); 585 Id buffer_index{};
586 Id buffer_element{};
587 if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
588 // Direct access
589 const u32 offset_imm = immediate->GetValue();
590 ASSERT(offset_imm % 4 == 0);
591 buffer_index = Constant(t_uint, offset_imm / 16);
592 buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
593 } else if (std::holds_alternative<OperationNode>(*offset)) {
594 // Indirect access
595 const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
596 const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
597 const Id final_offset = Emit(OpUMod(
598 t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
599 buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
600 buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
601 } else {
602 UNREACHABLE_MSG("Unmanaged offset node type");
603 }
604 pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
605 buffer_index, buffer_element));
595 } 606 }
596
597 const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
598 buffer_index, buffer_element));
599 return Emit(OpLoad(t_float, pointer)); 607 return Emit(OpLoad(t_float, pointer));
600 608
601 } else if (const auto gmem = std::get_if<GmemNode>(node)) { 609 } else if (const auto gmem = std::get_if<GmemNode>(node)) {
@@ -612,7 +620,9 @@ private:
612 // It's invalid to call conditional on nested nodes, use an operation instead 620 // It's invalid to call conditional on nested nodes, use an operation instead
613 const Id true_label = OpLabel(); 621 const Id true_label = OpLabel();
614 const Id skip_label = OpLabel(); 622 const Id skip_label = OpLabel();
615 Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); 623 const Id condition = Visit(conditional->GetCondition());
624 Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
625 Emit(OpBranchConditional(condition, true_label, skip_label));
616 Emit(true_label); 626 Emit(true_label);
617 627
618 VisitBasicBlock(conditional->GetCode()); 628 VisitBasicBlock(conditional->GetCode());
@@ -968,11 +978,11 @@ private:
968 case ShaderStage::Vertex: { 978 case ShaderStage::Vertex: {
969 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't 979 // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
970 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. 980 // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
971 const Id position = AccessElement(t_float4, per_vertex, position_index); 981 const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
972 Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); 982 Id depth = Emit(OpLoad(t_float, z_pointer));
973 depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); 983 depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
974 depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); 984 depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
975 Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); 985 Emit(OpStore(z_pointer, depth));
976 break; 986 break;
977 } 987 }
978 case ShaderStage::Fragment: { 988 case ShaderStage::Fragment: {
@@ -1311,6 +1321,7 @@ private:
1311 &SPIRVDecompiler::WorkGroupId<2>, 1321 &SPIRVDecompiler::WorkGroupId<2>,
1312 }; 1322 };
1313 1323
1324 const VKDevice& device;
1314 const ShaderIR& ir; 1325 const ShaderIR& ir;
1315 const ShaderStage stage; 1326 const ShaderStage stage;
1316 const Tegra::Shader::Header header; 1327 const Tegra::Shader::Header header;
@@ -1349,12 +1360,18 @@ private:
1349 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); 1360 const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
1350 1361
1351 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); 1362 const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
1352 const Id t_cbuf_array = 1363 const Id t_cbuf_std140 = Decorate(
1353 Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), 1364 Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
1354 spv::Decoration::ArrayStride, CBUF_STRIDE); 1365 spv::Decoration::ArrayStride, 16u);
1355 const Id t_cbuf_struct = MemberDecorate( 1366 const Id t_cbuf_scalar = Decorate(
1356 Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); 1367 Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
1357 const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); 1368 spv::Decoration::ArrayStride, 4u);
1369 const Id t_cbuf_std140_struct = MemberDecorate(
1370 Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
1371 const Id t_cbuf_scalar_struct = MemberDecorate(
1372 Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
1373 const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
1374 const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
1358 1375
1359 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); 1376 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
1360 const Id t_gmem_array = 1377 const Id t_gmem_array =
@@ -1403,8 +1420,9 @@ private:
1403 std::map<u32, Id> labels; 1420 std::map<u32, Id> labels;
1404}; 1421};
1405 1422
1406DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { 1423DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
1407 auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage); 1424 Maxwell::ShaderStage stage) {
1425 auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
1408 decompiler->Decompile(); 1426 decompiler->Decompile();
1409 return {std::move(decompiler), decompiler->GetShaderEntries()}; 1427 return {std::move(decompiler), decompiler->GetShaderEntries()};
1410} 1428}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 329d8fa38..f90541cc1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
20class ShaderIR; 20class ShaderIR;
21} 21}
22 22
23namespace Vulkan {
24class VKDevice;
25}
26
23namespace Vulkan::VKShader { 27namespace Vulkan::VKShader {
24 28
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 29using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26
27using SamplerEntry = VideoCommon::Shader::Sampler; 30using SamplerEntry = VideoCommon::Shader::Sampler;
28 31
29constexpr u32 DESCRIPTOR_SET = 0; 32constexpr u32 DESCRIPTOR_SET = 0;
@@ -75,6 +78,7 @@ struct ShaderEntries {
75 78
76using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; 79using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
77 80
78DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); 81DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
82 Maxwell::ShaderStage stage);
79 83
80} // namespace Vulkan::VKShader 84} // namespace Vulkan::VKShader