diff options
| author | 2020-03-14 09:48:15 -0400 | |
|---|---|---|
| committer | 2020-03-14 09:48:15 -0400 | |
| commit | 35145bd529c3517e2c366efc764a762092d96edf (patch) | |
| tree | 58c80a2133092b990ca11f3a357d70fab2c5fd0b /src/video_core/renderer_vulkan | |
| parent | Merge pull request #3473 from ReinUsesLisp/shader-purge (diff) | |
| parent | vk/gl_shader_decompiler: Silence assertion on compute (diff) | |
| download | yuzu-35145bd529c3517e2c366efc764a762092d96edf.tar.gz yuzu-35145bd529c3517e2c366efc764a762092d96edf.tar.xz yuzu-35145bd529c3517e2c366efc764a762092d96edf.zip | |
Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
Diffstat (limited to 'src/video_core/renderer_vulkan')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 45 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 138 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.h | 13 |
8 files changed, 236 insertions, 74 deletions
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 886bde3b9..3847bd722 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 107 | features.occlusionQueryPrecise = true; | 107 | features.occlusionQueryPrecise = true; |
| 108 | features.fragmentStoresAndAtomics = true; | 108 | features.fragmentStoresAndAtomics = true; |
| 109 | features.shaderImageGatherExtended = true; | 109 | features.shaderImageGatherExtended = true; |
| 110 | features.shaderStorageImageReadWithoutFormat = | 110 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; |
| 111 | is_shader_storage_img_read_without_format_supported; | ||
| 112 | features.shaderStorageImageWriteWithoutFormat = true; | 111 | features.shaderStorageImageWriteWithoutFormat = true; |
| 113 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | 112 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; |
| 114 | 113 | ||
| @@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 148 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | 147 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 150 | vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; | ||
| 151 | if (ext_transform_feedback) { | ||
| 152 | transform_feedback.transformFeedback = true; | ||
| 153 | transform_feedback.geometryStreams = true; | ||
| 154 | SetNext(next, transform_feedback); | ||
| 155 | } else { | ||
| 156 | LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); | ||
| 157 | } | ||
| 158 | |||
| 151 | if (!ext_depth_range_unrestricted) { | 159 | if (!ext_depth_range_unrestricted) { |
| 152 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 160 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 153 | } | 161 | } |
| @@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 385 | } | 393 | } |
| 386 | }; | 394 | }; |
| 387 | 395 | ||
| 388 | extensions.reserve(14); | 396 | extensions.reserve(15); |
| 389 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 397 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 390 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 398 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |
| 391 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | 399 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |
| @@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 397 | 405 | ||
| 398 | [[maybe_unused]] const bool nsight = | 406 | [[maybe_unused]] const bool nsight = |
| 399 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 407 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 400 | bool khr_shader_float16_int8{}; | 408 | bool has_khr_shader_float16_int8{}; |
| 401 | bool ext_subgroup_size_control{}; | 409 | bool has_ext_subgroup_size_control{}; |
| 410 | bool has_ext_transform_feedback{}; | ||
| 402 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 411 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 403 | Test(extension, khr_uniform_buffer_standard_layout, | 412 | Test(extension, khr_uniform_buffer_standard_layout, |
| 404 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 413 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 405 | Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 414 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, |
| 415 | false); | ||
| 406 | Test(extension, ext_depth_range_unrestricted, | 416 | Test(extension, ext_depth_range_unrestricted, |
| 407 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 417 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 408 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 418 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 409 | Test(extension, ext_shader_viewport_index_layer, | 419 | Test(extension, ext_shader_viewport_index_layer, |
| 410 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); | 420 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); |
| 411 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, | 421 | Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, |
| 422 | false); | ||
| 423 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, | ||
| 412 | false); | 424 | false); |
| 413 | if (Settings::values.renderer_debug) { | 425 | if (Settings::values.renderer_debug) { |
| 414 | Test(extension, nv_device_diagnostic_checkpoints, | 426 | Test(extension, nv_device_diagnostic_checkpoints, |
| @@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 416 | } | 428 | } |
| 417 | } | 429 | } |
| 418 | 430 | ||
| 419 | if (khr_shader_float16_int8) { | 431 | if (has_khr_shader_float16_int8) { |
| 420 | is_float16_supported = | 432 | is_float16_supported = |
| 421 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | 433 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; |
| 422 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 434 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 423 | } | 435 | } |
| 424 | 436 | ||
| 425 | if (ext_subgroup_size_control) { | 437 | if (has_ext_subgroup_size_control) { |
| 426 | const auto features = | 438 | const auto features = |
| 427 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); | 439 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); |
| 428 | const auto properties = | 440 | const auto properties = |
| @@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 439 | is_warp_potentially_bigger = true; | 451 | is_warp_potentially_bigger = true; |
| 440 | } | 452 | } |
| 441 | 453 | ||
| 454 | if (has_ext_transform_feedback) { | ||
| 455 | const auto features = | ||
| 456 | GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); | ||
| 457 | const auto properties = | ||
| 458 | GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); | ||
| 459 | |||
| 460 | if (features.transformFeedback && features.geometryStreams && | ||
| 461 | properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && | ||
| 462 | properties.transformFeedbackQueries && properties.transformFeedbackDraw) { | ||
| 463 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); | ||
| 464 | ext_transform_feedback = true; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 442 | return extensions; | 468 | return extensions; |
| 443 | } | 469 | } |
| 444 | 470 | ||
| @@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 467 | 493 | ||
| 468 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | 494 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { |
| 469 | const auto supported_features{physical.getFeatures(dldi)}; | 495 | const auto supported_features{physical.getFeatures(dldi)}; |
| 470 | is_shader_storage_img_read_without_format_supported = | 496 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 471 | supported_features.shaderStorageImageReadWithoutFormat; | ||
| 472 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | 497 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); |
| 473 | } | 498 | } |
| 474 | 499 | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730..6e656517f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -122,11 +122,6 @@ public: | |||
| 122 | return properties.limits.maxPushConstantsSize; | 122 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /// Returns true if Shader storage Image Read Without Format supported. | ||
| 126 | bool IsShaderStorageImageReadWithoutFormatSupported() const { | ||
| 127 | return is_shader_storage_img_read_without_format_supported; | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Returns true if ASTC is natively supported. | 125 | /// Returns true if ASTC is natively supported. |
| 131 | bool IsOptimalAstcSupported() const { | 126 | bool IsOptimalAstcSupported() const { |
| 132 | return is_optimal_astc_supported; | 127 | return is_optimal_astc_supported; |
| @@ -147,6 +142,11 @@ public: | |||
| 147 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; | 142 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; |
| 148 | } | 143 | } |
| 149 | 144 | ||
| 145 | /// Returns true if formatless image load is supported. | ||
| 146 | bool IsFormatlessImageLoadSupported() const { | ||
| 147 | return is_formatless_image_load_supported; | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. |
| 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 152 | return khr_uniform_buffer_standard_layout; | 152 | return khr_uniform_buffer_standard_layout; |
| @@ -167,6 +167,11 @@ public: | |||
| 167 | return ext_shader_viewport_index_layer; | 167 | return ext_shader_viewport_index_layer; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | /// Returns true if the device supports VK_EXT_transform_feedback. | ||
| 171 | bool IsExtTransformFeedbackSupported() const { | ||
| 172 | return ext_transform_feedback; | ||
| 173 | } | ||
| 174 | |||
| 170 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | 175 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. |
| 171 | bool IsNvDeviceDiagnosticCheckpoints() const { | 176 | bool IsNvDeviceDiagnosticCheckpoints() const { |
| 172 | return nv_device_diagnostic_checkpoints; | 177 | return nv_device_diagnostic_checkpoints; |
| @@ -214,26 +219,26 @@ private: | |||
| 214 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( | 219 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 215 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 220 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 216 | 221 | ||
| 217 | const vk::PhysicalDevice physical; ///< Physical device. | 222 | const vk::PhysicalDevice physical; ///< Physical device. |
| 218 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 223 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 219 | vk::PhysicalDeviceProperties properties; ///< Device properties. | 224 | vk::PhysicalDeviceProperties properties; ///< Device properties. |
| 220 | UniqueDevice logical; ///< Logical device. | 225 | UniqueDevice logical; ///< Logical device. |
| 221 | vk::Queue graphics_queue; ///< Main graphics queue. | 226 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 222 | vk::Queue present_queue; ///< Main present queue. | 227 | vk::Queue present_queue; ///< Main present queue. |
| 223 | u32 graphics_family{}; ///< Main graphics queue family index. | 228 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 224 | u32 present_family{}; ///< Main present queue family index. | 229 | u32 present_family{}; ///< Main present queue family index. |
| 225 | vk::DriverIdKHR driver_id{}; ///< Driver ID. | 230 | vk::DriverIdKHR driver_id{}; ///< Driver ID. |
| 226 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. | 231 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed |
| 227 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 232 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 228 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 233 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 229 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 234 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 235 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | ||
| 230 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 236 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 231 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 237 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 232 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 238 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 233 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 239 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 240 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | ||
| 234 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | 241 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. |
| 235 | bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage | ||
| 236 | ///< image read without format | ||
| 237 | 242 | ||
| 238 | // Telemetry parameters | 243 | // Telemetry parameters |
| 239 | std::string vendor_name; ///< Device's driver name. | 244 | std::string vendor_name; ///< Device's driver name. |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ebf85f311..056ef495c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 273 | specialization.workgroup_size = key.workgroup_size; | 273 | specialization.workgroup_size = key.workgroup_size; |
| 274 | specialization.shared_memory_size = key.shared_memory_size; | 274 | specialization.shared_memory_size = key.shared_memory_size; |
| 275 | 275 | ||
| 276 | const SPIRVShader spirv_shader{ | 276 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, |
| 277 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | 277 | shader->GetRegistry(), specialization), |
| 278 | shader->GetEntries()}; | 278 | shader->GetEntries()}; |
| 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, |
| 280 | update_descriptor_queue, spirv_shader); | 280 | update_descriptor_queue, spirv_shader); |
| 281 | return *entry; | 281 | return *entry; |
| @@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 324 | const auto& gpu = system.GPU().Maxwell3D(); | 324 | const auto& gpu = system.GPU().Maxwell3D(); |
| 325 | 325 | ||
| 326 | Specialization specialization; | 326 | Specialization specialization; |
| 327 | specialization.primitive_topology = fixed_state.input_assembly.topology; | 327 | if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { |
| 328 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 329 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | 328 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); |
| 330 | specialization.point_size = fixed_state.input_assembly.point_size; | 329 | specialization.point_size = fixed_state.input_assembly.point_size; |
| 331 | } | 330 | } |
| @@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 333 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | 332 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; |
| 334 | } | 333 | } |
| 335 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 334 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 336 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 337 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 338 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 339 | 335 | ||
| 340 | SPIRVProgram program; | 336 | SPIRVProgram program; |
| 341 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 337 | std::vector<vk::DescriptorSetLayoutBinding> bindings; |
| @@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 356 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 352 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 357 | const auto program_type = GetShaderType(program_enum); | 353 | const auto program_type = GetShaderType(program_enum); |
| 358 | const auto& entries = shader->GetEntries(); | 354 | const auto& entries = shader->GetEntries(); |
| 359 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | 355 | program[stage] = { |
| 360 | entries}; | 356 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
| 357 | entries}; | ||
| 361 | 358 | ||
| 362 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | 359 | if (program_enum == Maxwell::ShaderProgram::VertexA) { |
| 363 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 360 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e292526bb..21340c9a4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -132,6 +132,10 @@ public: | |||
| 132 | return shader_ir; | 132 | return shader_ir; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 136 | return registry; | ||
| 137 | } | ||
| 138 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | 139 | const VideoCommon::Shader::ShaderIR& GetIR() const { |
| 136 | return shader_ir; | 140 | return shader_ir; |
| 137 | } | 141 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2bcb17b56..f889019c1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 347 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | 347 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | BeginTransformFeedback(); | ||
| 351 | |||
| 350 | const auto pipeline_layout = pipeline.GetLayout(); | 352 | const auto pipeline_layout = pipeline.GetLayout(); |
| 351 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | 353 | const auto descriptor_set = pipeline.CommitDescriptorSet(); |
| 352 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | 354 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { |
| @@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 356 | } | 358 | } |
| 357 | draw_params.Draw(cmdbuf, dld); | 359 | draw_params.Draw(cmdbuf, dld); |
| 358 | }); | 360 | }); |
| 361 | |||
| 362 | EndTransformFeedback(); | ||
| 359 | } | 363 | } |
| 360 | 364 | ||
| 361 | void RasterizerVulkan::Clear() { | 365 | void RasterizerVulkan::Clear() { |
| @@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 738 | UpdateStencilFaces(regs); | 742 | UpdateStencilFaces(regs); |
| 739 | } | 743 | } |
| 740 | 744 | ||
| 745 | void RasterizerVulkan::BeginTransformFeedback() { | ||
| 746 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 747 | if (regs.tfb_enabled == 0) { | ||
| 748 | return; | ||
| 749 | } | ||
| 750 | |||
| 751 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 752 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 753 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 754 | |||
| 755 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | ||
| 756 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 757 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 758 | |||
| 759 | const auto& binding = regs.tfb_bindings[0]; | ||
| 760 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 761 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 762 | |||
| 763 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 764 | const std::size_t size = binding.buffer_size; | ||
| 765 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 766 | |||
| 767 | scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { | ||
| 768 | cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); | ||
| 769 | cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); | ||
| 770 | }); | ||
| 771 | } | ||
| 772 | |||
| 773 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 774 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 775 | if (regs.tfb_enabled == 0) { | ||
| 776 | return; | ||
| 777 | } | ||
| 778 | |||
| 779 | scheduler.Record( | ||
| 780 | [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); | ||
| 781 | } | ||
| 782 | |||
| 741 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 783 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| 742 | BufferBindings& buffer_bindings) { | 784 | BufferBindings& buffer_bindings) { |
| 743 | const auto& regs = system.GPU().Maxwell3D().regs; | 785 | const auto& regs = system.GPU().Maxwell3D().regs; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96ea05f0a..b2e73d98d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -169,6 +169,10 @@ private: | |||
| 169 | 169 | ||
| 170 | void UpdateDynamicStates(); | 170 | void UpdateDynamicStates(); |
| 171 | 171 | ||
| 172 | void BeginTransformFeedback(); | ||
| 173 | |||
| 174 | void EndTransformFeedback(); | ||
| 175 | |||
| 172 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | 176 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); |
| 173 | 177 | ||
| 174 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 178 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cfcca5af0..b2c298051 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <functional> | 5 | #include <functional> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <optional> | ||
| 8 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include <unordered_map> | ||
| 9 | #include <utility> | 11 | #include <utility> |
| 10 | 12 | ||
| 11 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| @@ -24,6 +26,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 25 | #include "video_core/shader/node.h" | 27 | #include "video_core/shader/node.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 28 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader/transform_feedback.h" | ||
| 27 | 30 | ||
| 28 | namespace Vulkan { | 31 | namespace Vulkan { |
| 29 | 32 | ||
| @@ -93,6 +96,12 @@ struct VertexIndices { | |||
| 93 | std::optional<u32> clip_distances; | 96 | std::optional<u32> clip_distances; |
| 94 | }; | 97 | }; |
| 95 | 98 | ||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 96 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { |
| 97 | ASSERT(!sampler.IsBuffer()); | 106 | ASSERT(!sampler.IsBuffer()); |
| 98 | switch (sampler.GetType()) { | 107 | switch (sampler.GetType()) { |
| @@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) { | |||
| 266 | class SPIRVDecompiler final : public Sirit::Module { | 275 | class SPIRVDecompiler final : public Sirit::Module { |
| 267 | public: | 276 | public: |
| 268 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, | 277 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, |
| 269 | const Specialization& specialization) | 278 | const Registry& registry, const Specialization& specialization) |
| 270 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, | 279 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, |
| 271 | specialization{specialization} { | 280 | registry{registry}, specialization{specialization} { |
| 281 | if (stage != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 272 | AddCapability(spv::Capability::Shader); | 285 | AddCapability(spv::Capability::Shader); |
| 273 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); |
| 274 | AddCapability(spv::Capability::ImageQuery); | 287 | AddCapability(spv::Capability::ImageQuery); |
| @@ -286,6 +299,15 @@ public: | |||
| 286 | AddExtension("SPV_KHR_variable_pointers"); | 299 | AddExtension("SPV_KHR_variable_pointers"); |
| 287 | AddExtension("SPV_KHR_shader_draw_parameters"); | 300 | AddExtension("SPV_KHR_shader_draw_parameters"); |
| 288 | 301 | ||
| 302 | if (!transform_feedback.empty()) { | ||
| 303 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 304 | AddCapability(spv::Capability::TransformFeedback); | ||
| 305 | } else { | ||
| 306 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 307 | "supported on this device"); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 289 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { |
| 290 | if (ir.UsesViewportIndex()) { | 312 | if (ir.UsesViewportIndex()) { |
| 291 | AddCapability(spv::Capability::MultiViewport); | 313 | AddCapability(spv::Capability::MultiViewport); |
| @@ -296,7 +318,7 @@ public: | |||
| 296 | } | 318 | } |
| 297 | } | 319 | } |
| 298 | 320 | ||
| 299 | if (device.IsShaderStorageImageReadWithoutFormatSupported()) { | 321 | if (device.IsFormatlessImageLoadSupported()) { |
| 300 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | 322 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); |
| 301 | } | 323 | } |
| 302 | 324 | ||
| @@ -318,25 +340,29 @@ public: | |||
| 318 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 340 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 319 | header.common2.threads_per_input_primitive); | 341 | header.common2.threads_per_input_primitive); |
| 320 | break; | 342 | break; |
| 321 | case ShaderType::TesselationEval: | 343 | case ShaderType::TesselationEval: { |
| 344 | const auto& info = registry.GetGraphicsInfo(); | ||
| 322 | AddCapability(spv::Capability::Tessellation); | 345 | AddCapability(spv::Capability::Tessellation); |
| 323 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | 346 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); |
| 324 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); | 347 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); |
| 325 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); | 348 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); |
| 326 | AddExecutionMode(main, specialization.tessellation.clockwise | 349 | AddExecutionMode(main, info.tessellation_clockwise |
| 327 | ? spv::ExecutionMode::VertexOrderCw | 350 | ? spv::ExecutionMode::VertexOrderCw |
| 328 | : spv::ExecutionMode::VertexOrderCcw); | 351 | : spv::ExecutionMode::VertexOrderCcw); |
| 329 | break; | 352 | break; |
| 330 | case ShaderType::Geometry: | 353 | } |
| 354 | case ShaderType::Geometry: { | ||
| 355 | const auto& info = registry.GetGraphicsInfo(); | ||
| 331 | AddCapability(spv::Capability::Geometry); | 356 | AddCapability(spv::Capability::Geometry); |
| 332 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | 357 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); |
| 333 | AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); | 358 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); |
| 334 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | 359 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); |
| 335 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 360 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 336 | header.common4.max_output_vertices); | 361 | header.common4.max_output_vertices); |
| 337 | // TODO(Rodrigo): Where can we get this info from? | 362 | // TODO(Rodrigo): Where can we get this info from? |
| 338 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | 363 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); |
| 339 | break; | 364 | break; |
| 365 | } | ||
| 340 | case ShaderType::Fragment: | 366 | case ShaderType::Fragment: |
| 341 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | 367 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); |
| 342 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | 368 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); |
| @@ -545,7 +571,8 @@ private: | |||
| 545 | if (stage != ShaderType::Geometry) { | 571 | if (stage != ShaderType::Geometry) { |
| 546 | return; | 572 | return; |
| 547 | } | 573 | } |
| 548 | const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 574 | const auto& info = registry.GetGraphicsInfo(); |
| 575 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 549 | DeclareInputVertexArray(num_input); | 576 | DeclareInputVertexArray(num_input); |
| 550 | DeclareOutputVertex(); | 577 | DeclareOutputVertex(); |
| 551 | } | 578 | } |
| @@ -742,12 +769,34 @@ private: | |||
| 742 | } | 769 | } |
| 743 | 770 | ||
| 744 | void DeclareOutputAttributes() { | 771 | void DeclareOutputAttributes() { |
| 772 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | |||
| 776 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 745 | for (const auto index : ir.GetOutputAttributes()) { | 777 | for (const auto index : ir.GetOutputAttributes()) { |
| 746 | if (!IsGenericAttribute(index)) { | 778 | if (!IsGenericAttribute(index)) { |
| 747 | continue; | 779 | continue; |
| 748 | } | 780 | } |
| 749 | const u32 location = GetGenericAttributeLocation(index); | 781 | DeclareOutputAttribute(index); |
| 750 | Id type = t_float4; | 782 | } |
| 783 | } | ||
| 784 | |||
| 785 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 786 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 787 | |||
| 788 | const u32 location = GetGenericAttributeLocation(index); | ||
| 789 | u8 element = 0; | ||
| 790 | while (element < 4) { | ||
| 791 | const std::size_t remainder = 4 - element; | ||
| 792 | |||
| 793 | std::size_t num_components = remainder; | ||
| 794 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 795 | if (tfb) { | ||
| 796 | num_components = tfb->components; | ||
| 797 | } | ||
| 798 | |||
| 799 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 751 | Id varying_default = v_varying_default; | 800 | Id varying_default = v_varying_default; |
| 752 | if (IsOutputAttributeArray()) { | 801 | if (IsOutputAttributeArray()) { |
| 753 | const u32 num = GetNumOutputVertices(); | 802 | const u32 num = GetNumOutputVertices(); |
| @@ -760,13 +809,45 @@ private: | |||
| 760 | } | 809 | } |
| 761 | type = TypePointer(spv::StorageClass::Output, type); | 810 | type = TypePointer(spv::StorageClass::Output, type); |
| 762 | 811 | ||
| 812 | std::string name = fmt::format("out_attr{}", location); | ||
| 813 | if (num_components < 4 || element > 0) { | ||
| 814 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 815 | } | ||
| 816 | |||
| 763 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | 817 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); |
| 764 | Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); | 818 | Name(AddGlobalVariable(id), name); |
| 765 | output_attributes.emplace(index, id); | 819 | |
| 820 | GenericVaryingDescription description; | ||
| 821 | description.id = id; | ||
| 822 | description.first_element = element; | ||
| 823 | description.is_scalar = num_components == 1; | ||
| 824 | for (u32 i = 0; i < num_components; ++i) { | ||
| 825 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 826 | output_attributes.emplace(offset, description); | ||
| 827 | } | ||
| 766 | interfaces.push_back(id); | 828 | interfaces.push_back(id); |
| 767 | 829 | ||
| 768 | Decorate(id, spv::Decoration::Location, location); | 830 | Decorate(id, spv::Decoration::Location, location); |
| 831 | if (element > 0) { | ||
| 832 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 833 | } | ||
| 834 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 835 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 836 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 837 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 838 | } | ||
| 839 | |||
| 840 | element += static_cast<u8>(num_components); | ||
| 841 | } | ||
| 842 | } | ||
| 843 | |||
| 844 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 845 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 846 | const auto it = transform_feedback.find(location); | ||
| 847 | if (it == transform_feedback.end()) { | ||
| 848 | return {}; | ||
| 769 | } | 849 | } |
| 850 | return it->second; | ||
| 770 | } | 851 | } |
| 771 | 852 | ||
| 772 | u32 DeclareConstantBuffers(u32 binding) { | 853 | u32 DeclareConstantBuffers(u32 binding) { |
| @@ -898,7 +979,7 @@ private: | |||
| 898 | u32 GetNumInputVertices() const { | 979 | u32 GetNumInputVertices() const { |
| 899 | switch (stage) { | 980 | switch (stage) { |
| 900 | case ShaderType::Geometry: | 981 | case ShaderType::Geometry: |
| 901 | return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 982 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); |
| 902 | case ShaderType::TesselationControl: | 983 | case ShaderType::TesselationControl: |
| 903 | case ShaderType::TesselationEval: | 984 | case ShaderType::TesselationEval: |
| 904 | return NumInputPatches; | 985 | return NumInputPatches; |
| @@ -1346,8 +1427,14 @@ private: | |||
| 1346 | } | 1427 | } |
| 1347 | default: | 1428 | default: |
| 1348 | if (IsGenericAttribute(attribute)) { | 1429 | if (IsGenericAttribute(attribute)) { |
| 1349 | const Id composite = output_attributes.at(attribute); | 1430 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); |
| 1350 | return {ArrayPass(t_out_float, composite, {element}), Type::Float}; | 1431 | const GenericVaryingDescription description = output_attributes.at(offset); |
| 1432 | const Id composite = description.id; | ||
| 1433 | std::vector<u32> indices; | ||
| 1434 | if (!description.is_scalar) { | ||
| 1435 | indices.push_back(element - description.first_element); | ||
| 1436 | } | ||
| 1437 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1351 | } | 1438 | } |
| 1352 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | 1439 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", |
| 1353 | static_cast<u32>(attribute)); | 1440 | static_cast<u32>(attribute)); |
| @@ -1793,7 +1880,7 @@ private: | |||
| 1793 | } | 1880 | } |
| 1794 | 1881 | ||
| 1795 | Expression ImageLoad(Operation operation) { | 1882 | Expression ImageLoad(Operation operation) { |
| 1796 | if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { | 1883 | if (!device.IsFormatlessImageLoadSupported()) { |
| 1797 | return {v_float_zero, Type::Float}; | 1884 | return {v_float_zero, Type::Float}; |
| 1798 | } | 1885 | } |
| 1799 | 1886 | ||
| @@ -2258,11 +2345,11 @@ private: | |||
| 2258 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | 2345 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { |
| 2259 | switch (type) { | 2346 | switch (type) { |
| 2260 | case Type::Float: | 2347 | case Type::Float: |
| 2261 | return {nullptr, t_float2, t_float3, t_float4}; | 2348 | return {t_float, t_float2, t_float3, t_float4}; |
| 2262 | case Type::Int: | 2349 | case Type::Int: |
| 2263 | return {nullptr, t_int2, t_int3, t_int4}; | 2350 | return {t_int, t_int2, t_int3, t_int4}; |
| 2264 | case Type::Uint: | 2351 | case Type::Uint: |
| 2265 | return {nullptr, t_uint2, t_uint3, t_uint4}; | 2352 | return {t_uint, t_uint2, t_uint3, t_uint4}; |
| 2266 | default: | 2353 | default: |
| 2267 | UNIMPLEMENTED(); | 2354 | UNIMPLEMENTED(); |
| 2268 | return {}; | 2355 | return {}; |
| @@ -2495,7 +2582,9 @@ private: | |||
| 2495 | const ShaderIR& ir; | 2582 | const ShaderIR& ir; |
| 2496 | const ShaderType stage; | 2583 | const ShaderType stage; |
| 2497 | const Tegra::Shader::Header header; | 2584 | const Tegra::Shader::Header header; |
| 2585 | const Registry& registry; | ||
| 2498 | const Specialization& specialization; | 2586 | const Specialization& specialization; |
| 2587 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2499 | 2588 | ||
| 2500 | const Id t_void = Name(TypeVoid(), "void"); | 2589 | const Id t_void = Name(TypeVoid(), "void"); |
| 2501 | 2590 | ||
| @@ -2584,7 +2673,7 @@ private: | |||
| 2584 | Id shared_memory{}; | 2673 | Id shared_memory{}; |
| 2585 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | 2674 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; |
| 2586 | std::map<Attribute::Index, Id> input_attributes; | 2675 | std::map<Attribute::Index, Id> input_attributes; |
| 2587 | std::map<Attribute::Index, Id> output_attributes; | 2676 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2588 | std::map<u32, Id> constant_buffers; | 2677 | std::map<u32, Id> constant_buffers; |
| 2589 | std::map<GlobalMemoryBase, Id> global_buffers; | 2678 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2590 | std::map<u32, TexelBuffer> texel_buffers; | 2679 | std::map<u32, TexelBuffer> texel_buffers; |
| @@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2870 | } | 2959 | } |
| 2871 | 2960 | ||
| 2872 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 2961 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 2873 | ShaderType stage, const Specialization& specialization) { | 2962 | ShaderType stage, const VideoCommon::Shader::Registry& registry, |
| 2874 | return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); | 2963 | const Specialization& specialization) { |
| 2964 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 2875 | } | 2965 | } |
| 2876 | 2966 | ||
| 2877 | } // namespace Vulkan | 2967 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9e..ffea4709e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 18 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | 19 | #include "video_core/shader/shader_ir.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -91,17 +92,9 @@ struct Specialization final { | |||
| 91 | u32 shared_memory_size{}; | 92 | u32 shared_memory_size{}; |
| 92 | 93 | ||
| 93 | // Graphics specific | 94 | // Graphics specific |
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | 97 | bool ndc_minus_one_to_one{}; |
| 98 | |||
| 99 | // Tessellation specific | ||
| 100 | struct { | ||
| 101 | Maxwell::TessellationPrimitive primitive{}; | ||
| 102 | Maxwell::TessellationSpacing spacing{}; | ||
| 103 | bool clockwise{}; | ||
| 104 | } tessellation; | ||
| 105 | }; | 98 | }; |
| 106 | // Old gcc versions don't consider this trivially copyable. | 99 | // Old gcc versions don't consider this trivially copyable. |
| 107 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 100 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
| @@ -114,6 +107,8 @@ struct SPIRVShader { | |||
| 114 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | 107 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 115 | 108 | ||
| 116 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 109 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 117 | Tegra::Engines::ShaderType stage, const Specialization& specialization); | 110 | Tegra::Engines::ShaderType stage, |
| 111 | const VideoCommon::Shader::Registry& registry, | ||
| 112 | const Specialization& specialization); | ||
| 118 | 113 | ||
| 119 | } // namespace Vulkan | 114 | } // namespace Vulkan |