summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar David2020-08-17 15:06:11 +1000
committerGravatar GitHub2020-08-17 15:06:11 +1000
commitcbaf1bc711e79a9094f157d3e0327f3a2c707462 (patch)
tree084010b04064d06dc17716962df430e2bd213e7e /src
parentMerge pull request #4515 from lat9nq/pgs-menubar-config (diff)
parentRemove unneeded newlines, optional Registry in shader params (diff)
downloadyuzu-cbaf1bc711e79a9094f157d3e0327f3a2c707462.tar.gz
yuzu-cbaf1bc711e79a9094f157d3e0327f3a2c707462.tar.xz
yuzu-cbaf1bc711e79a9094f157d3e0327f3a2c707462.zip
Merge pull request #4443 from ameerj/vk-async-shaders
vulkan_renderer: Async shader/graphics pipeline compilation
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h28
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp26
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h10
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp2
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h2
-rw-r--r--src/video_core/shader/async_shaders.cpp90
-rw-r--r--src/video_core/shader/async_shaders.h39
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui2
15 files changed, 210 insertions, 88 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index cb284db77..4af5824cd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
177 } 177 }
178 178
179 if (device.UseAsynchronousShaders()) { 179 if (device.UseAsynchronousShaders()) {
180 // Max worker threads we should allow 180 async_shaders.AllocateWorkers();
181 constexpr u32 MAX_THREADS = 4;
182 // Deduce how many threads we can use
183 const u32 threads_used = std::thread::hardware_concurrency() / 4;
184 // Always allow at least 1 thread regardless of our settings
185 const auto max_worker_count = std::max(1U, threads_used);
186 // Don't use more than MAX_THREADS
187 const auto worker_count = std::min(max_worker_count, MAX_THREADS);
188 async_shaders.AllocateWorkers(worker_count);
189 } 181 }
190} 182}
191 183
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 0c03e4d83..ebcfaa0e3 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -382,6 +382,8 @@ bool VKDevice::Create() {
382 382
383 graphics_queue = logical.GetQueue(graphics_family); 383 graphics_queue = logical.GetQueue(graphics_family);
384 present_queue = logical.GetQueue(present_family); 384 present_queue = logical.GetQueue(present_family);
385
386 use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
385 return true; 387 return true;
386} 388}
387 389
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 529744f2d..26a233db1 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -202,6 +202,11 @@ public:
202 return reported_extensions; 202 return reported_extensions;
203 } 203 }
204 204
205 /// Returns true if the setting for async shader compilation is enabled.
206 bool UseAsynchronousShaders() const {
207 return use_asynchronous_shaders;
208 }
209
205 /// Checks if the physical device is suitable. 210 /// Checks if the physical device is suitable.
206 static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); 211 static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
207 212
@@ -252,6 +257,9 @@ private:
252 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 257 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
253 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 258 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
254 259
260 // Asynchronous Graphics Pipeline setting
261 bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
262
255 // Telemetry parameters 263 // Telemetry parameters
256 std::string vendor_name; ///< Device's driver name. 264 std::string vendor_name; ///< Device's driver name.
257 std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. 265 std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index a02be5487..d7f65d435 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -29,7 +29,7 @@ void InnerFence::Queue() {
29 } 29 }
30 ASSERT(!event); 30 ASSERT(!event);
31 31
32 event = device.GetLogical().CreateEvent(); 32 event = device.GetLogical().CreateNewEvent();
33 ticks = scheduler.Ticks(); 33 ticks = scheduler.Ticks();
34 34
35 scheduler.RequestOutsideRenderPassOperationContext(); 35 scheduler.RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index aaf930b90..2e46c6278 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
78 const GraphicsPipelineCacheKey& key, 78 const GraphicsPipelineCacheKey& key,
79 vk::Span<VkDescriptorSetLayoutBinding> bindings, 79 vk::Span<VkDescriptorSetLayoutBinding> bindings,
80 const SPIRVProgram& program) 80 const SPIRVProgram& program)
81 : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, 81 : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
82 descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, 82 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
83 descriptor_allocator{descriptor_pool, *descriptor_set_layout}, 83 descriptor_allocator{descriptor_pool, *descriptor_set_layout},
84 update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, 84 update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
85 descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( 85 descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
86 program)}, 86 program)},
87 renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( 87 renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
88 key.renderpass_params, 88 pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
89 program)} {}
90 89
91VKGraphicsPipeline::~VKGraphicsPipeline() = default; 90VKGraphicsPipeline::~VKGraphicsPipeline() = default;
92 91
@@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
181 180
182vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, 181vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
183 const SPIRVProgram& program) const { 182 const SPIRVProgram& program) const {
184 const auto& state = fixed_state; 183 const auto& state = cache_key.fixed_state;
185 const auto& viewport_swizzles = state.viewport_swizzles; 184 const auto& viewport_swizzles = state.viewport_swizzles;
186 185
187 FixedPipelineState::DynamicState dynamic; 186 FixedPipelineState::DynamicState dynamic;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index a1d699a6c..58aa35efd 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -19,7 +19,27 @@ namespace Vulkan {
19 19
20using Maxwell = Tegra::Engines::Maxwell3D::Regs; 20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22struct GraphicsPipelineCacheKey; 22struct GraphicsPipelineCacheKey {
23 RenderPassParams renderpass_params;
24 u32 padding;
25 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
26 FixedPipelineState fixed_state;
27
28 std::size_t Hash() const noexcept;
29
30 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
31
32 bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
33 return !operator==(rhs);
34 }
35
36 std::size_t Size() const noexcept {
37 return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
38 }
39};
40static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
41static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
42static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
23 43
24class VKDescriptorPool; 44class VKDescriptorPool;
25class VKDevice; 45class VKDevice;
@@ -54,6 +74,10 @@ public:
54 return renderpass; 74 return renderpass;
55 } 75 }
56 76
77 GraphicsPipelineCacheKey GetCacheKey() const {
78 return cache_key;
79 }
80
57private: 81private:
58 vk::DescriptorSetLayout CreateDescriptorSetLayout( 82 vk::DescriptorSetLayout CreateDescriptorSetLayout(
59 vk::Span<VkDescriptorSetLayoutBinding> bindings) const; 83 vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -70,7 +94,7 @@ private:
70 94
71 const VKDevice& device; 95 const VKDevice& device;
72 VKScheduler& scheduler; 96 VKScheduler& scheduler;
73 const FixedPipelineState fixed_state; 97 const GraphicsPipelineCacheKey cache_key;
74 const u64 hash; 98 const u64 hash;
75 99
76 vk::DescriptorSetLayout descriptor_set_layout; 100 vk::DescriptorSetLayout descriptor_set_layout;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 418c62bc4..cfdcdd6ab 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -28,6 +28,7 @@
28#include "video_core/shader/compiler_settings.h" 28#include "video_core/shader/compiler_settings.h"
29#include "video_core/shader/memory_util.h" 29#include "video_core/shader/memory_util.h"
30#include "video_core/shader_cache.h" 30#include "video_core/shader_cache.h"
31#include "video_core/shader_notify.h"
31 32
32namespace Vulkan { 33namespace Vulkan {
33 34
@@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
205 return last_shaders = shaders; 206 return last_shaders = shaders;
206} 207}
207 208
208VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { 209VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
210 const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
209 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 211 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
210 212
211 if (last_graphics_pipeline && last_graphics_key == key) { 213 if (last_graphics_pipeline && last_graphics_key == key) {
212 return *last_graphics_pipeline; 214 return last_graphics_pipeline;
213 } 215 }
214 last_graphics_key = key; 216 last_graphics_key = key;
215 217
218 if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
219 std::unique_lock lock{pipeline_cache};
220 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
221 if (is_cache_miss) {
222 system.GPU().ShaderNotify().MarkSharderBuilding();
223 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
224 const auto [program, bindings] = DecompileShaders(key.fixed_state);
225 async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
226 update_descriptor_queue, renderpass_cache, bindings,
227 program, key);
228 }
229 last_graphics_pipeline = pair->second.get();
230 return last_graphics_pipeline;
231 }
232
216 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); 233 const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
217 auto& entry = pair->second; 234 auto& entry = pair->second;
218 if (is_cache_miss) { 235 if (is_cache_miss) {
236 system.GPU().ShaderNotify().MarkSharderBuilding();
219 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); 237 LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
220 const auto [program, bindings] = DecompileShaders(key); 238 const auto [program, bindings] = DecompileShaders(key.fixed_state);
221 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, 239 entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
222 update_descriptor_queue, renderpass_cache, key, 240 update_descriptor_queue, renderpass_cache, key,
223 bindings, program); 241 bindings, program);
242 system.GPU().ShaderNotify().MarkShaderComplete();
224 } 243 }
225 return *(last_graphics_pipeline = entry.get()); 244 last_graphics_pipeline = entry.get();
245 return last_graphics_pipeline;
226} 246}
227 247
228VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { 248VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
@@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
277 return *entry; 297 return *entry;
278} 298}
279 299
300void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
301 system.GPU().ShaderNotify().MarkShaderComplete();
302 std::unique_lock lock{pipeline_cache};
303 graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
304}
305
280void VKPipelineCache::OnShaderRemoval(Shader* shader) { 306void VKPipelineCache::OnShaderRemoval(Shader* shader) {
281 bool finished = false; 307 bool finished = false;
282 const auto Finish = [&] { 308 const auto Finish = [&] {
@@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
312} 338}
313 339
314std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 340std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
315VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { 341VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
316 const auto& fixed_state = key.fixed_state;
317 auto& memory_manager = system.GPU().MemoryManager(); 342 auto& memory_manager = system.GPU().MemoryManager();
318 const auto& gpu = system.GPU().Maxwell3D(); 343 const auto& gpu = system.GPU().Maxwell3D();
319 344
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0a3fe65fb..c04829e77 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -22,6 +22,7 @@
22#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
23#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 23#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
24#include "video_core/renderer_vulkan/wrapper.h" 24#include "video_core/renderer_vulkan/wrapper.h"
25#include "video_core/shader/async_shaders.h"
25#include "video_core/shader/memory_util.h" 26#include "video_core/shader/memory_util.h"
26#include "video_core/shader/registry.h" 27#include "video_core/shader/registry.h"
27#include "video_core/shader/shader_ir.h" 28#include "video_core/shader/shader_ir.h"
@@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue;
43 44
44using Maxwell = Tegra::Engines::Maxwell3D::Regs; 45using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45 46
46struct GraphicsPipelineCacheKey {
47 RenderPassParams renderpass_params;
48 u32 padding;
49 std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
50 FixedPipelineState fixed_state;
51
52 std::size_t Hash() const noexcept;
53
54 bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
55
56 bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
57 return !operator==(rhs);
58 }
59
60 std::size_t Size() const noexcept {
61 return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
62 }
63};
64static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
65static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
66static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
67
68struct ComputePipelineCacheKey { 47struct ComputePipelineCacheKey {
69 GPUVAddr shader; 48 GPUVAddr shader;
70 u32 shared_memory_size; 49 u32 shared_memory_size;
@@ -152,16 +131,19 @@ public:
152 131
153 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); 132 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
154 133
155 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); 134 VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
135 VideoCommon::Shader::AsyncShaders& async_shaders);
156 136
157 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 137 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
158 138
139 void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
140
159protected: 141protected:
160 void OnShaderRemoval(Shader* shader) final; 142 void OnShaderRemoval(Shader* shader) final;
161 143
162private: 144private:
163 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 145 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
164 const GraphicsPipelineCacheKey& key); 146 const FixedPipelineState& fixed_state);
165 147
166 Core::System& system; 148 Core::System& system;
167 const VKDevice& device; 149 const VKDevice& device;
@@ -178,6 +160,7 @@ private:
178 GraphicsPipelineCacheKey last_graphics_key; 160 GraphicsPipelineCacheKey last_graphics_key;
179 VKGraphicsPipeline* last_graphics_pipeline = nullptr; 161 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
180 162
163 std::mutex pipeline_cache;
181 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> 164 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
182 graphics_cache; 165 graphics_cache;
183 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; 166 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7500e8244..936f76195 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
14#include "common/assert.h" 14#include "common/assert.h"
15#include "common/logging/log.h" 15#include "common/logging/log.h"
16#include "common/microprofile.h" 16#include "common/microprofile.h"
17#include "common/scope_exit.h"
17#include "core/core.h" 18#include "core/core.h"
18#include "core/settings.h" 19#include "core/settings.h"
19#include "video_core/engines/kepler_compute.h" 20#include "video_core/engines/kepler_compute.h"
@@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
400 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 401 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
401 sampler_cache(device), 402 sampler_cache(device),
402 fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), 403 fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
403 query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { 404 query_cache(system, *this, device, scheduler),
405 wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
404 scheduler.SetQueryCache(query_cache); 406 scheduler.SetQueryCache(query_cache);
407 if (device.UseAsynchronousShaders()) {
408 async_shaders.AllocateWorkers();
409 }
405} 410}
406 411
407RasterizerVulkan::~RasterizerVulkan() = default; 412RasterizerVulkan::~RasterizerVulkan() = default;
@@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
413 418
414 query_cache.UpdateCounters(); 419 query_cache.UpdateCounters();
415 420
421 SCOPE_EXIT({ system.GPU().TickWork(); });
422
416 const auto& gpu = system.GPU().Maxwell3D(); 423 const auto& gpu = system.GPU().Maxwell3D();
417 GraphicsPipelineCacheKey key; 424 GraphicsPipelineCacheKey key;
418 key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); 425 key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
@@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
439 key.renderpass_params = GetRenderPassParams(texceptions); 446 key.renderpass_params = GetRenderPassParams(texceptions);
440 key.padding = 0; 447 key.padding = 0;
441 448
442 auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); 449 auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
443 scheduler.BindGraphicsPipeline(pipeline.GetHandle()); 450 if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
451 // Async graphics pipeline was not ready.
452 return;
453 }
454
455 scheduler.BindGraphicsPipeline(pipeline->GetHandle());
444 456
445 const auto renderpass = pipeline.GetRenderPass(); 457 const auto renderpass = pipeline->GetRenderPass();
446 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); 458 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
447 scheduler.RequestRenderpass(renderpass, framebuffer, render_area); 459 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
448 460
@@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
452 464
453 BeginTransformFeedback(); 465 BeginTransformFeedback();
454 466
455 const auto pipeline_layout = pipeline.GetLayout(); 467 const auto pipeline_layout = pipeline->GetLayout();
456 const auto descriptor_set = pipeline.CommitDescriptorSet(); 468 const auto descriptor_set = pipeline->CommitDescriptorSet();
457 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { 469 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
458 if (descriptor_set) { 470 if (descriptor_set) {
459 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 471 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
@@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
463 }); 475 });
464 476
465 EndTransformFeedback(); 477 EndTransformFeedback();
466
467 system.GPU().TickWork();
468} 478}
469 479
470void RasterizerVulkan::Clear() { 480void RasterizerVulkan::Clear() {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 923178b0b..f640ba649 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -32,6 +32,7 @@
32#include "video_core/renderer_vulkan/vk_texture_cache.h" 32#include "video_core/renderer_vulkan/vk_texture_cache.h"
33#include "video_core/renderer_vulkan/vk_update_descriptor.h" 33#include "video_core/renderer_vulkan/vk_update_descriptor.h"
34#include "video_core/renderer_vulkan/wrapper.h" 34#include "video_core/renderer_vulkan/wrapper.h"
35#include "video_core/shader/async_shaders.h"
35 36
36namespace Core { 37namespace Core {
37class System; 38class System;
@@ -136,6 +137,14 @@ public:
136 u32 pixel_stride) override; 137 u32 pixel_stride) override;
137 void SetupDirtyFlags() override; 138 void SetupDirtyFlags() override;
138 139
140 VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
141 return async_shaders;
142 }
143
144 const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
145 return async_shaders;
146 }
147
139 /// Maximum supported size that a constbuffer can have in bytes. 148 /// Maximum supported size that a constbuffer can have in bytes.
140 static constexpr std::size_t MaxConstbufferSize = 0x10000; 149 static constexpr std::size_t MaxConstbufferSize = 0x10000;
141 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, 150 static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
@@ -297,6 +306,7 @@ private:
297 vk::Buffer default_buffer; 306 vk::Buffer default_buffer;
298 VKMemoryCommit default_buffer_commit; 307 VKMemoryCommit default_buffer_commit;
299 vk::Event wfi_event; 308 vk::Event wfi_event;
309 VideoCommon::Shader::AsyncShaders async_shaders;
300 310
301 std::array<View, Maxwell::NumRenderTargets> color_attachments; 311 std::array<View, Maxwell::NumRenderTargets> color_attachments;
302 View zeta_attachment; 312 View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 14cac38ea..c43d60adf 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
644 return ShaderModule(object, handle, *dld); 644 return ShaderModule(object, handle, *dld);
645} 645}
646 646
647Event Device::CreateEvent() const { 647Event Device::CreateNewEvent() const {
648 static constexpr VkEventCreateInfo ci{ 648 static constexpr VkEventCreateInfo ci{
649 .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, 649 .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
650 .pNext = nullptr, 650 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 31885ef42..b9d3fedc1 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -721,7 +721,7 @@ public:
721 721
722 ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; 722 ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
723 723
724 Event CreateEvent() const; 724 Event CreateNewEvent() const;
725 725
726 SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; 726 SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
727 727
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index 3f6d5a75b..f815584f7 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -2,7 +2,6 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <chrono>
6#include <condition_variable> 5#include <condition_variable>
7#include <mutex> 6#include <mutex>
8#include <thread> 7#include <thread>
@@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() {
20 KillWorkers(); 19 KillWorkers();
21} 20}
22 21
23void AsyncShaders::AllocateWorkers(std::size_t num_workers) { 22void AsyncShaders::AllocateWorkers() {
24 // If we're already have workers queued or don't want to queue workers, ignore 23 // Max worker threads we should allow
25 if (num_workers == worker_threads.size() || num_workers == 0) { 24 constexpr u32 MAX_THREADS = 4;
25 // Deduce how many threads we can use
26 const u32 threads_used = std::thread::hardware_concurrency() / 4;
27 // Always allow at least 1 thread regardless of our settings
28 const auto max_worker_count = std::max(1U, threads_used);
29 // Don't use more than MAX_THREADS
30 const auto num_workers = std::min(max_worker_count, MAX_THREADS);
31
32 // If we already have workers queued, ignore
33 if (num_workers == worker_threads.size()) {
26 return; 34 return;
27 } 35 }
28 36
@@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
111 VideoCommon::Shader::CompilerSettings compiler_settings, 119 VideoCommon::Shader::CompilerSettings compiler_settings,
112 const VideoCommon::Shader::Registry& registry, 120 const VideoCommon::Shader::Registry& registry,
113 VAddr cpu_addr) { 121 VAddr cpu_addr) {
114 WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM 122 WorkerParams params{
115 : AsyncShaders::Backend::OpenGL, 123 .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
116 device, 124 .device = &device,
117 shader_type, 125 .shader_type = shader_type,
118 uid, 126 .uid = uid,
119 std::move(code), 127 .code = std::move(code),
120 std::move(code_b), 128 .code_b = std::move(code_b),
121 main_offset, 129 .main_offset = main_offset,
122 compiler_settings, 130 .compiler_settings = compiler_settings,
123 registry, 131 .registry = registry,
124 cpu_addr}; 132 .cpu_address = cpu_addr,
133 };
125 std::unique_lock lock(queue_mutex); 134 std::unique_lock lock(queue_mutex);
126 pending_queue.push_back(std::move(params)); 135 pending_queue.push(std::move(params));
136 cv.notify_one();
137}
138
139void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
140 const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
141 Vulkan::VKDescriptorPool& descriptor_pool,
142 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
143 Vulkan::VKRenderPassCache& renderpass_cache,
144 std::vector<VkDescriptorSetLayoutBinding> bindings,
145 Vulkan::SPIRVProgram program,
146 Vulkan::GraphicsPipelineCacheKey key) {
147 WorkerParams params{
148 .backend = Backend::Vulkan,
149 .pp_cache = pp_cache,
150 .vk_device = &device,
151 .scheduler = &scheduler,
152 .descriptor_pool = &descriptor_pool,
153 .update_descriptor_queue = &update_descriptor_queue,
154 .renderpass_cache = &renderpass_cache,
155 .bindings = bindings,
156 .program = program,
157 .key = key,
158 };
159
160 std::unique_lock lock(queue_mutex);
161 pending_queue.push(std::move(params));
127 cv.notify_one(); 162 cv.notify_one();
128} 163}
129 164
130void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { 165void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
131 using namespace std::chrono_literals;
132 while (!is_thread_exiting.load(std::memory_order_relaxed)) { 166 while (!is_thread_exiting.load(std::memory_order_relaxed)) {
133 std::unique_lock lock{queue_mutex}; 167 std::unique_lock lock{queue_mutex};
134 cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); 168 cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
@@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
144 if (pending_queue.empty()) { 178 if (pending_queue.empty()) {
145 continue; 179 continue;
146 } 180 }
181
147 // Pull work from queue 182 // Pull work from queue
148 WorkerParams work = std::move(pending_queue.front()); 183 WorkerParams work = std::move(pending_queue.front());
149 pending_queue.pop_front(); 184 pending_queue.pop();
150
151 lock.unlock(); 185 lock.unlock();
152 186
153 if (work.backend == AsyncShaders::Backend::OpenGL || 187 if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
154 work.backend == AsyncShaders::Backend::GLASM) { 188 const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
155 const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
156 const auto scope = context->Acquire(); 189 const auto scope = context->Acquire();
157 auto program = 190 auto program =
158 OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); 191 OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
159 Result result{}; 192 Result result{};
160 result.backend = work.backend; 193 result.backend = work.backend;
161 result.cpu_address = work.cpu_address; 194 result.cpu_address = work.cpu_address;
@@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
164 result.code_b = std::move(work.code_b); 197 result.code_b = std::move(work.code_b);
165 result.shader_type = work.shader_type; 198 result.shader_type = work.shader_type;
166 199
167 if (work.backend == AsyncShaders::Backend::OpenGL) { 200 if (work.backend == Backend::OpenGL) {
168 result.program.opengl = std::move(program->source_program); 201 result.program.opengl = std::move(program->source_program);
169 } else if (work.backend == AsyncShaders::Backend::GLASM) { 202 } else if (work.backend == Backend::GLASM) {
170 result.program.glasm = std::move(program->assembly_program); 203 result.program.glasm = std::move(program->assembly_program);
171 } 204 }
172 205
@@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
174 std::unique_lock complete_lock(completed_mutex); 207 std::unique_lock complete_lock(completed_mutex);
175 finished_work.push_back(std::move(result)); 208 finished_work.push_back(std::move(result));
176 } 209 }
210 } else if (work.backend == Backend::Vulkan) {
211 auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
212 *work.vk_device, *work.scheduler, *work.descriptor_pool,
213 *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
214 work.program);
215
216 work.pp_cache->EmplacePipeline(std::move(pipeline));
177 } 217 }
178 } 218 }
179} 219}
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index 2f5ee94ad..d5ae814d5 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -14,6 +14,10 @@
14#include "video_core/renderer_opengl/gl_device.h" 14#include "video_core/renderer_opengl/gl_device.h"
15#include "video_core/renderer_opengl/gl_resource_manager.h" 15#include "video_core/renderer_opengl/gl_resource_manager.h"
16#include "video_core/renderer_opengl/gl_shader_decompiler.h" 16#include "video_core/renderer_opengl/gl_shader_decompiler.h"
17#include "video_core/renderer_vulkan/vk_device.h"
18#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h"
20#include "video_core/renderer_vulkan/vk_update_descriptor.h"
17 21
18namespace Core::Frontend { 22namespace Core::Frontend {
19class EmuWindow; 23class EmuWindow;
@@ -24,6 +28,10 @@ namespace Tegra {
24class GPU; 28class GPU;
25} 29}
26 30
31namespace Vulkan {
32class VKPipelineCache;
33}
34
27namespace VideoCommon::Shader { 35namespace VideoCommon::Shader {
28 36
29class AsyncShaders { 37class AsyncShaders {
@@ -31,6 +39,7 @@ public:
31 enum class Backend { 39 enum class Backend {
32 OpenGL, 40 OpenGL,
33 GLASM, 41 GLASM,
42 Vulkan,
34 }; 43 };
35 44
36 struct ResultPrograms { 45 struct ResultPrograms {
@@ -52,7 +61,7 @@ public:
52 ~AsyncShaders(); 61 ~AsyncShaders();
53 62
54 /// Start up shader worker threads 63 /// Start up shader worker threads
55 void AllocateWorkers(std::size_t num_workers); 64 void AllocateWorkers();
56 65
57 /// Clear the shader queue and kill all worker threads 66 /// Clear the shader queue and kill all worker threads
58 void FreeWorkers(); 67 void FreeWorkers();
@@ -76,6 +85,14 @@ public:
76 VideoCommon::Shader::CompilerSettings compiler_settings, 85 VideoCommon::Shader::CompilerSettings compiler_settings,
77 const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); 86 const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
78 87
88 void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
89 Vulkan::VKScheduler& scheduler,
90 Vulkan::VKDescriptorPool& descriptor_pool,
91 Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
92 Vulkan::VKRenderPassCache& renderpass_cache,
93 std::vector<VkDescriptorSetLayoutBinding> bindings,
94 Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
95
79private: 96private:
80 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); 97 void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
81 98
@@ -83,16 +100,28 @@ private:
83 bool HasWorkQueued(); 100 bool HasWorkQueued();
84 101
85 struct WorkerParams { 102 struct WorkerParams {
86 AsyncShaders::Backend backend; 103 Backend backend;
87 OpenGL::Device device; 104 // For OGL
105 const OpenGL::Device* device;
88 Tegra::Engines::ShaderType shader_type; 106 Tegra::Engines::ShaderType shader_type;
89 u64 uid; 107 u64 uid;
90 std::vector<u64> code; 108 std::vector<u64> code;
91 std::vector<u64> code_b; 109 std::vector<u64> code_b;
92 u32 main_offset; 110 u32 main_offset;
93 VideoCommon::Shader::CompilerSettings compiler_settings; 111 VideoCommon::Shader::CompilerSettings compiler_settings;
94 VideoCommon::Shader::Registry registry; 112 std::optional<VideoCommon::Shader::Registry> registry;
95 VAddr cpu_address; 113 VAddr cpu_address;
114
115 // For Vulkan
116 Vulkan::VKPipelineCache* pp_cache;
117 const Vulkan::VKDevice* vk_device;
118 Vulkan::VKScheduler* scheduler;
119 Vulkan::VKDescriptorPool* descriptor_pool;
120 Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
121 Vulkan::VKRenderPassCache* renderpass_cache;
122 std::vector<VkDescriptorSetLayoutBinding> bindings;
123 Vulkan::SPIRVProgram program;
124 Vulkan::GraphicsPipelineCacheKey key;
96 }; 125 };
97 126
98 std::condition_variable cv; 127 std::condition_variable cv;
@@ -101,7 +130,7 @@ private:
101 std::atomic<bool> is_thread_exiting{}; 130 std::atomic<bool> is_thread_exiting{};
102 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; 131 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
103 std::vector<std::thread> worker_threads; 132 std::vector<std::thread> worker_threads;
104 std::deque<WorkerParams> pending_queue; 133 std::queue<WorkerParams> pending_queue;
105 std::vector<AsyncShaders::Result> finished_work; 134 std::vector<AsyncShaders::Result> finished_work;
106 Core::Frontend::EmuWindow& emu_window; 135 Core::Frontend::EmuWindow& emu_window;
107}; 136};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index a793c803d..846a30586 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -92,7 +92,7 @@
92 <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> 92 <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
93 </property> 93 </property>
94 <property name="text"> 94 <property name="text">
95 <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string> 95 <string>Use asynchronous shader building (experimental)</string>
96 </property> 96 </property>
97 </widget> 97 </widget>
98 </item> 98 </item>