summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_vulkan
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2020-03-14 09:48:15 -0400
committerGravatar GitHub2020-03-14 09:48:15 -0400
commit35145bd529c3517e2c366efc764a762092d96edf (patch)
tree58c80a2133092b990ca11f3a357d70fab2c5fd0b /src/video_core/renderer_vulkan
parentMerge pull request #3473 from ReinUsesLisp/shader-purge (diff)
parentvk/gl_shader_decompiler: Silence assertion on compute (diff)
downloadyuzu-35145bd529c3517e2c366efc764a762092d96edf.tar.gz
yuzu-35145bd529c3517e2c366efc764a762092d96edf.tar.xz
yuzu-35145bd529c3517e2c366efc764a762092d96edf.zip
Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
Diffstat (limited to 'src/video_core/renderer_vulkan')
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp47
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp42
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp138
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
8 files changed, 236 insertions, 74 deletions
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..3847bd722 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
107 features.occlusionQueryPrecise = true; 107 features.occlusionQueryPrecise = true;
108 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
109 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
110 features.shaderStorageImageReadWithoutFormat = 110 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
111 is_shader_storage_img_read_without_format_supported;
112 features.shaderStorageImageWriteWithoutFormat = true; 111 features.shaderStorageImageWriteWithoutFormat = true;
113 features.textureCompressionASTC_LDR = is_optimal_astc_supported; 112 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
114 113
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
148 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); 147 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
149 } 148 }
150 149
150 vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
151 if (ext_transform_feedback) {
152 transform_feedback.transformFeedback = true;
153 transform_feedback.geometryStreams = true;
154 SetNext(next, transform_feedback);
155 } else {
156 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
157 }
158
151 if (!ext_depth_range_unrestricted) { 159 if (!ext_depth_range_unrestricted) {
152 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 160 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
153 } 161 }
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
385 } 393 }
386 }; 394 };
387 395
388 extensions.reserve(14); 396 extensions.reserve(15);
389 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 397 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
390 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 398 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
391 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 399 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
397 405
398 [[maybe_unused]] const bool nsight = 406 [[maybe_unused]] const bool nsight =
399 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 407 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
400 bool khr_shader_float16_int8{}; 408 bool has_khr_shader_float16_int8{};
401 bool ext_subgroup_size_control{}; 409 bool has_ext_subgroup_size_control{};
410 bool has_ext_transform_feedback{};
402 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 411 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
403 Test(extension, khr_uniform_buffer_standard_layout, 412 Test(extension, khr_uniform_buffer_standard_layout,
404 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 413 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
405 Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 414 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
415 false);
406 Test(extension, ext_depth_range_unrestricted, 416 Test(extension, ext_depth_range_unrestricted,
407 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 417 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
408 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 418 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
409 Test(extension, ext_shader_viewport_index_layer, 419 Test(extension, ext_shader_viewport_index_layer,
410 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); 420 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
411 Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, 421 Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
422 false);
423 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
412 false); 424 false);
413 if (Settings::values.renderer_debug) { 425 if (Settings::values.renderer_debug) {
414 Test(extension, nv_device_diagnostic_checkpoints, 426 Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
416 } 428 }
417 } 429 }
418 430
419 if (khr_shader_float16_int8) { 431 if (has_khr_shader_float16_int8) {
420 is_float16_supported = 432 is_float16_supported =
421 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 433 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
422 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 434 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
423 } 435 }
424 436
425 if (ext_subgroup_size_control) { 437 if (has_ext_subgroup_size_control) {
426 const auto features = 438 const auto features =
427 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 439 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
428 const auto properties = 440 const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
439 is_warp_potentially_bigger = true; 451 is_warp_potentially_bigger = true;
440 } 452 }
441 453
454 if (has_ext_transform_feedback) {
455 const auto features =
456 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
457 const auto properties =
458 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
459
460 if (features.transformFeedback && features.geometryStreams &&
461 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
462 properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
463 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
464 ext_transform_feedback = true;
465 }
466 }
467
442 return extensions; 468 return extensions;
443} 469}
444 470
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
467 493
468void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 494void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
469 const auto supported_features{physical.getFeatures(dldi)}; 495 const auto supported_features{physical.getFeatures(dldi)};
470 is_shader_storage_img_read_without_format_supported = 496 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
471 supported_features.shaderStorageImageReadWithoutFormat;
472 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 497 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
473} 498}
474 499
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
122 return properties.limits.maxPushConstantsSize; 122 return properties.limits.maxPushConstantsSize;
123 } 123 }
124 124
125 /// Returns true if Shader storage Image Read Without Format supported.
126 bool IsShaderStorageImageReadWithoutFormatSupported() const {
127 return is_shader_storage_img_read_without_format_supported;
128 }
129
130 /// Returns true if ASTC is natively supported. 125 /// Returns true if ASTC is natively supported.
131 bool IsOptimalAstcSupported() const { 126 bool IsOptimalAstcSupported() const {
132 return is_optimal_astc_supported; 127 return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
147 return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; 142 return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
148 } 143 }
149 144
145 /// Returns true if formatless image load is supported.
146 bool IsFormatlessImageLoadSupported() const {
147 return is_formatless_image_load_supported;
148 }
149
150 /// Returns true if the device supports VK_EXT_scalar_block_layout. 150 /// Returns true if the device supports VK_EXT_scalar_block_layout.
151 bool IsKhrUniformBufferStandardLayoutSupported() const { 151 bool IsKhrUniformBufferStandardLayoutSupported() const {
152 return khr_uniform_buffer_standard_layout; 152 return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
167 return ext_shader_viewport_index_layer; 167 return ext_shader_viewport_index_layer;
168 } 168 }
169 169
170 /// Returns true if the device supports VK_EXT_transform_feedback.
171 bool IsExtTransformFeedbackSupported() const {
172 return ext_transform_feedback;
173 }
174
170 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. 175 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
171 bool IsNvDeviceDiagnosticCheckpoints() const { 176 bool IsNvDeviceDiagnosticCheckpoints() const {
172 return nv_device_diagnostic_checkpoints; 177 return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
214 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
215 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
216 221
217 const vk::PhysicalDevice physical; ///< Physical device. 222 const vk::PhysicalDevice physical; ///< Physical device.
218 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 223 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
219 vk::PhysicalDeviceProperties properties; ///< Device properties. 224 vk::PhysicalDeviceProperties properties; ///< Device properties.
220 UniqueDevice logical; ///< Logical device. 225 UniqueDevice logical; ///< Logical device.
221 vk::Queue graphics_queue; ///< Main graphics queue. 226 vk::Queue graphics_queue; ///< Main graphics queue.
222 vk::Queue present_queue; ///< Main present queue. 227 vk::Queue present_queue; ///< Main present queue.
223 u32 graphics_family{}; ///< Main graphics queue family index. 228 u32 graphics_family{}; ///< Main graphics queue family index.
224 u32 present_family{}; ///< Main present queue family index. 229 u32 present_family{}; ///< Main present queue family index.
225 vk::DriverIdKHR driver_id{}; ///< Driver ID. 230 vk::DriverIdKHR driver_id{}; ///< Driver ID.
226 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 231 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
227 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 232 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
228 bool is_float16_supported{}; ///< Support for float16 arithmetics. 233 bool is_float16_supported{}; ///< Support for float16 arithmetics.
229 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 234 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
235 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
230 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 236 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
231 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 237 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
232 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 238 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
233 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 239 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
240 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
234 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. 241 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
235 bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
236 ///< image read without format
237 242
238 // Telemetry parameters 243 // Telemetry parameters
239 std::string vendor_name; ///< Device's driver name. 244 std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ebf85f311..056ef495c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
273 specialization.workgroup_size = key.workgroup_size; 273 specialization.workgroup_size = key.workgroup_size;
274 specialization.shared_memory_size = key.shared_memory_size; 274 specialization.shared_memory_size = key.shared_memory_size;
275 275
276 const SPIRVShader spirv_shader{ 276 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
277 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), 277 shader->GetRegistry(), specialization),
278 shader->GetEntries()}; 278 shader->GetEntries()};
279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, 279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
280 update_descriptor_queue, spirv_shader); 280 update_descriptor_queue, spirv_shader);
281 return *entry; 281 return *entry;
@@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
324 const auto& gpu = system.GPU().Maxwell3D(); 324 const auto& gpu = system.GPU().Maxwell3D();
325 325
326 Specialization specialization; 326 Specialization specialization;
327 specialization.primitive_topology = fixed_state.input_assembly.topology; 327 if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
328 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
329 ASSERT(fixed_state.input_assembly.point_size != 0.0f); 328 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
330 specialization.point_size = fixed_state.input_assembly.point_size; 329 specialization.point_size = fixed_state.input_assembly.point_size;
331 } 330 }
@@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
333 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; 332 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
334 } 333 }
335 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 334 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
336 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
337 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
338 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
339 335
340 SPIRVProgram program; 336 SPIRVProgram program;
341 std::vector<vk::DescriptorSetLayoutBinding> bindings; 337 std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
356 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
357 const auto program_type = GetShaderType(program_enum); 353 const auto program_type = GetShaderType(program_enum);
358 const auto& entries = shader->GetEntries(); 354 const auto& entries = shader->GetEntries();
359 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), 355 program[stage] = {
360 entries}; 356 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
357 entries};
361 358
362 if (program_enum == Maxwell::ShaderProgram::VertexA) { 359 if (program_enum == Maxwell::ShaderProgram::VertexA) {
363 // VertexB was combined with VertexA, so we skip the VertexB iteration 360 // VertexB was combined with VertexA, so we skip the VertexB iteration
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e292526bb..21340c9a4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -132,6 +132,10 @@ public:
132 return shader_ir; 132 return shader_ir;
133 } 133 }
134 134
135 const VideoCommon::Shader::Registry& GetRegistry() const {
136 return registry;
137 }
138
135 const VideoCommon::Shader::ShaderIR& GetIR() const { 139 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir; 140 return shader_ir;
137 } 141 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2bcb17b56..f889019c1 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
347 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); 347 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
348 } 348 }
349 349
350 BeginTransformFeedback();
351
350 const auto pipeline_layout = pipeline.GetLayout(); 352 const auto pipeline_layout = pipeline.GetLayout();
351 const auto descriptor_set = pipeline.CommitDescriptorSet(); 353 const auto descriptor_set = pipeline.CommitDescriptorSet();
352 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { 354 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
356 } 358 }
357 draw_params.Draw(cmdbuf, dld); 359 draw_params.Draw(cmdbuf, dld);
358 }); 360 });
361
362 EndTransformFeedback();
359} 363}
360 364
361void RasterizerVulkan::Clear() { 365void RasterizerVulkan::Clear() {
@@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
738 UpdateStencilFaces(regs); 742 UpdateStencilFaces(regs);
739} 743}
740 744
745void RasterizerVulkan::BeginTransformFeedback() {
746 const auto& regs = system.GPU().Maxwell3D().regs;
747 if (regs.tfb_enabled == 0) {
748 return;
749 }
750
751 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
752 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
753 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
754
755 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
756 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
757 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
758
759 const auto& binding = regs.tfb_bindings[0];
760 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
761 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
762
763 const GPUVAddr gpu_addr = binding.Address();
764 const std::size_t size = binding.buffer_size;
765 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
766
767 scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
768 cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
769 cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
770 });
771}
772
773void RasterizerVulkan::EndTransformFeedback() {
774 const auto& regs = system.GPU().Maxwell3D().regs;
775 if (regs.tfb_enabled == 0) {
776 return;
777 }
778
779 scheduler.Record(
780 [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
781}
782
741void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 783void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
742 BufferBindings& buffer_bindings) { 784 BufferBindings& buffer_bindings) {
743 const auto& regs = system.GPU().Maxwell3D().regs; 785 const auto& regs = system.GPU().Maxwell3D().regs;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 96ea05f0a..b2e73d98d 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -169,6 +169,10 @@ private:
169 169
170 void UpdateDynamicStates(); 170 void UpdateDynamicStates();
171 171
172 void BeginTransformFeedback();
173
174 void EndTransformFeedback();
175
172 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); 176 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
173 177
174 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 178 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index cfcca5af0..b2c298051 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <functional> 5#include <functional>
6#include <limits> 6#include <limits>
7#include <map> 7#include <map>
8#include <optional>
8#include <type_traits> 9#include <type_traits>
10#include <unordered_map>
9#include <utility> 11#include <utility>
10 12
11#include <fmt/format.h> 13#include <fmt/format.h>
@@ -24,6 +26,7 @@
24#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 26#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
25#include "video_core/shader/node.h" 27#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h" 28#include "video_core/shader/shader_ir.h"
29#include "video_core/shader/transform_feedback.h"
27 30
28namespace Vulkan { 31namespace Vulkan {
29 32
@@ -93,6 +96,12 @@ struct VertexIndices {
93 std::optional<u32> clip_distances; 96 std::optional<u32> clip_distances;
94}; 97};
95 98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
96spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const Sampler& sampler) {
97 ASSERT(!sampler.IsBuffer()); 106 ASSERT(!sampler.IsBuffer());
98 switch (sampler.GetType()) { 107 switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
266class SPIRVDecompiler final : public Sirit::Module { 275class SPIRVDecompiler final : public Sirit::Module {
267public: 276public:
268 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, 277 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
269 const Specialization& specialization) 278 const Registry& registry, const Specialization& specialization)
270 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, 279 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
271 specialization{specialization} { 280 registry{registry}, specialization{specialization} {
281 if (stage != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
283 }
284
272 AddCapability(spv::Capability::Shader); 285 AddCapability(spv::Capability::Shader);
273 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); 286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
274 AddCapability(spv::Capability::ImageQuery); 287 AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
286 AddExtension("SPV_KHR_variable_pointers"); 299 AddExtension("SPV_KHR_variable_pointers");
287 AddExtension("SPV_KHR_shader_draw_parameters"); 300 AddExtension("SPV_KHR_shader_draw_parameters");
288 301
302 if (!transform_feedback.empty()) {
303 if (device.IsExtTransformFeedbackSupported()) {
304 AddCapability(spv::Capability::TransformFeedback);
305 } else {
306 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
307 "supported on this device");
308 }
309 }
310
289 if (ir.UsesLayer() || ir.UsesViewportIndex()) { 311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
290 if (ir.UsesViewportIndex()) { 312 if (ir.UsesViewportIndex()) {
291 AddCapability(spv::Capability::MultiViewport); 313 AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
296 } 318 }
297 } 319 }
298 320
299 if (device.IsShaderStorageImageReadWithoutFormatSupported()) { 321 if (device.IsFormatlessImageLoadSupported()) {
300 AddCapability(spv::Capability::StorageImageReadWithoutFormat); 322 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
301 } 323 }
302 324
@@ -318,25 +340,29 @@ public:
318 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 340 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
319 header.common2.threads_per_input_primitive); 341 header.common2.threads_per_input_primitive);
320 break; 342 break;
321 case ShaderType::TesselationEval: 343 case ShaderType::TesselationEval: {
344 const auto& info = registry.GetGraphicsInfo();
322 AddCapability(spv::Capability::Tessellation); 345 AddCapability(spv::Capability::Tessellation);
323 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); 346 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
324 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); 347 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
325 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); 348 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
326 AddExecutionMode(main, specialization.tessellation.clockwise 349 AddExecutionMode(main, info.tessellation_clockwise
327 ? spv::ExecutionMode::VertexOrderCw 350 ? spv::ExecutionMode::VertexOrderCw
328 : spv::ExecutionMode::VertexOrderCcw); 351 : spv::ExecutionMode::VertexOrderCcw);
329 break; 352 break;
330 case ShaderType::Geometry: 353 }
354 case ShaderType::Geometry: {
355 const auto& info = registry.GetGraphicsInfo();
331 AddCapability(spv::Capability::Geometry); 356 AddCapability(spv::Capability::Geometry);
332 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); 357 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
333 AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); 358 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
334 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); 359 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
335 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 360 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
336 header.common4.max_output_vertices); 361 header.common4.max_output_vertices);
337 // TODO(Rodrigo): Where can we get this info from? 362 // TODO(Rodrigo): Where can we get this info from?
338 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); 363 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
339 break; 364 break;
365 }
340 case ShaderType::Fragment: 366 case ShaderType::Fragment:
341 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); 367 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
342 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); 368 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
545 if (stage != ShaderType::Geometry) { 571 if (stage != ShaderType::Geometry) {
546 return; 572 return;
547 } 573 }
548 const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 574 const auto& info = registry.GetGraphicsInfo();
575 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
549 DeclareInputVertexArray(num_input); 576 DeclareInputVertexArray(num_input);
550 DeclareOutputVertex(); 577 DeclareOutputVertex();
551 } 578 }
@@ -742,12 +769,34 @@ private:
742 } 769 }
743 770
744 void DeclareOutputAttributes() { 771 void DeclareOutputAttributes() {
772 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
773 return;
774 }
775
776 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
745 for (const auto index : ir.GetOutputAttributes()) { 777 for (const auto index : ir.GetOutputAttributes()) {
746 if (!IsGenericAttribute(index)) { 778 if (!IsGenericAttribute(index)) {
747 continue; 779 continue;
748 } 780 }
749 const u32 location = GetGenericAttributeLocation(index); 781 DeclareOutputAttribute(index);
750 Id type = t_float4; 782 }
783 }
784
785 void DeclareOutputAttribute(Attribute::Index index) {
786 static constexpr std::string_view swizzle = "xyzw";
787
788 const u32 location = GetGenericAttributeLocation(index);
789 u8 element = 0;
790 while (element < 4) {
791 const std::size_t remainder = 4 - element;
792
793 std::size_t num_components = remainder;
794 const std::optional tfb = GetTransformFeedbackInfo(index, element);
795 if (tfb) {
796 num_components = tfb->components;
797 }
798
799 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
751 Id varying_default = v_varying_default; 800 Id varying_default = v_varying_default;
752 if (IsOutputAttributeArray()) { 801 if (IsOutputAttributeArray()) {
753 const u32 num = GetNumOutputVertices(); 802 const u32 num = GetNumOutputVertices();
@@ -760,13 +809,45 @@ private:
760 } 809 }
761 type = TypePointer(spv::StorageClass::Output, type); 810 type = TypePointer(spv::StorageClass::Output, type);
762 811
812 std::string name = fmt::format("out_attr{}", location);
813 if (num_components < 4 || element > 0) {
814 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
815 }
816
763 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); 817 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
764 Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); 818 Name(AddGlobalVariable(id), name);
765 output_attributes.emplace(index, id); 819
820 GenericVaryingDescription description;
821 description.id = id;
822 description.first_element = element;
823 description.is_scalar = num_components == 1;
824 for (u32 i = 0; i < num_components; ++i) {
825 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
826 output_attributes.emplace(offset, description);
827 }
766 interfaces.push_back(id); 828 interfaces.push_back(id);
767 829
768 Decorate(id, spv::Decoration::Location, location); 830 Decorate(id, spv::Decoration::Location, location);
831 if (element > 0) {
832 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
833 }
834 if (tfb && device.IsExtTransformFeedbackSupported()) {
835 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
836 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
838 }
839
840 element += static_cast<u8>(num_components);
841 }
842 }
843
844 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
845 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
846 const auto it = transform_feedback.find(location);
847 if (it == transform_feedback.end()) {
848 return {};
769 } 849 }
850 return it->second;
770 } 851 }
771 852
772 u32 DeclareConstantBuffers(u32 binding) { 853 u32 DeclareConstantBuffers(u32 binding) {
@@ -898,7 +979,7 @@ private:
898 u32 GetNumInputVertices() const { 979 u32 GetNumInputVertices() const {
899 switch (stage) { 980 switch (stage) {
900 case ShaderType::Geometry: 981 case ShaderType::Geometry:
901 return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 982 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
902 case ShaderType::TesselationControl: 983 case ShaderType::TesselationControl:
903 case ShaderType::TesselationEval: 984 case ShaderType::TesselationEval:
904 return NumInputPatches; 985 return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
1346 } 1427 }
1347 default: 1428 default:
1348 if (IsGenericAttribute(attribute)) { 1429 if (IsGenericAttribute(attribute)) {
1349 const Id composite = output_attributes.at(attribute); 1430 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1350 return {ArrayPass(t_out_float, composite, {element}), Type::Float}; 1431 const GenericVaryingDescription description = output_attributes.at(offset);
1432 const Id composite = description.id;
1433 std::vector<u32> indices;
1434 if (!description.is_scalar) {
1435 indices.push_back(element - description.first_element);
1436 }
1437 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1351 } 1438 }
1352 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", 1439 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1353 static_cast<u32>(attribute)); 1440 static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
1793 } 1880 }
1794 1881
1795 Expression ImageLoad(Operation operation) { 1882 Expression ImageLoad(Operation operation) {
1796 if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { 1883 if (!device.IsFormatlessImageLoadSupported()) {
1797 return {v_float_zero, Type::Float}; 1884 return {v_float_zero, Type::Float};
1798 } 1885 }
1799 1886
@@ -2258,11 +2345,11 @@ private:
2258 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { 2345 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2259 switch (type) { 2346 switch (type) {
2260 case Type::Float: 2347 case Type::Float:
2261 return {nullptr, t_float2, t_float3, t_float4}; 2348 return {t_float, t_float2, t_float3, t_float4};
2262 case Type::Int: 2349 case Type::Int:
2263 return {nullptr, t_int2, t_int3, t_int4}; 2350 return {t_int, t_int2, t_int3, t_int4};
2264 case Type::Uint: 2351 case Type::Uint:
2265 return {nullptr, t_uint2, t_uint3, t_uint4}; 2352 return {t_uint, t_uint2, t_uint3, t_uint4};
2266 default: 2353 default:
2267 UNIMPLEMENTED(); 2354 UNIMPLEMENTED();
2268 return {}; 2355 return {};
@@ -2495,7 +2582,9 @@ private:
2495 const ShaderIR& ir; 2582 const ShaderIR& ir;
2496 const ShaderType stage; 2583 const ShaderType stage;
2497 const Tegra::Shader::Header header; 2584 const Tegra::Shader::Header header;
2585 const Registry& registry;
2498 const Specialization& specialization; 2586 const Specialization& specialization;
2587 std::unordered_map<u8, VaryingTFB> transform_feedback;
2499 2588
2500 const Id t_void = Name(TypeVoid(), "void"); 2589 const Id t_void = Name(TypeVoid(), "void");
2501 2590
@@ -2584,7 +2673,7 @@ private:
2584 Id shared_memory{}; 2673 Id shared_memory{};
2585 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; 2674 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2586 std::map<Attribute::Index, Id> input_attributes; 2675 std::map<Attribute::Index, Id> input_attributes;
2587 std::map<Attribute::Index, Id> output_attributes; 2676 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2588 std::map<u32, Id> constant_buffers; 2677 std::map<u32, Id> constant_buffers;
2589 std::map<GlobalMemoryBase, Id> global_buffers; 2678 std::map<GlobalMemoryBase, Id> global_buffers;
2590 std::map<u32, TexelBuffer> texel_buffers; 2679 std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
2870} 2959}
2871 2960
2872std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 2961std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
2873 ShaderType stage, const Specialization& specialization) { 2962 ShaderType stage, const VideoCommon::Shader::Registry& registry,
2874 return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); 2963 const Specialization& specialization) {
2964 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
2875} 2965}
2876 2966
2877} // namespace Vulkan 2967} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
18#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h" 19#include "video_core/shader/shader_ir.h"
19 20
20namespace Vulkan { 21namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
91 u32 shared_memory_size{}; 92 u32 shared_memory_size{};
92 93
93 // Graphics specific 94 // Graphics specific
94 Maxwell::PrimitiveTopology primitive_topology{};
95 std::optional<float> point_size{}; 95 std::optional<float> point_size{};
96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
97 bool ndc_minus_one_to_one{}; 97 bool ndc_minus_one_to_one{};
98
99 // Tessellation specific
100 struct {
101 Maxwell::TessellationPrimitive primitive{};
102 Maxwell::TessellationSpacing spacing{};
103 bool clockwise{};
104 } tessellation;
105}; 98};
106// Old gcc versions don't consider this trivially copyable. 99// Old gcc versions don't consider this trivially copyable.
107// static_assert(std::is_trivially_copyable_v<Specialization>); 100// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
114ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); 107ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
115 108
116std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 109std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
117 Tegra::Engines::ShaderType stage, const Specialization& specialization); 110 Tegra::Engines::ShaderType stage,
111 const VideoCommon::Shader::Registry& registry,
112 const Specialization& specialization);
118 113
119} // namespace Vulkan 114} // namespace Vulkan