diff options
| author | 2021-04-01 04:09:09 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:25 -0400 | |
| commit | d0a529683a2e5a693b53c6f24f6816c06f8f7e65 (patch) | |
| tree | 06d9ccc769af3483d9cd51ad508a6d1d541bb5c6 /src | |
| parent | vulkan: Create pipeline layouts in separate threads (diff) | |
| download | yuzu-d0a529683a2e5a693b53c6f24f6816c06f8f7e65.tar.gz yuzu-d0a529683a2e5a693b53c6f24f6816c06f8f7e65.tar.xz yuzu-d0a529683a2e5a693b53c6f24f6816c06f8f7e65.zip | |
vulkan: Serialize pipelines on a separate thread
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 130 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 1 |
2 files changed, 64 insertions, 67 deletions
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 597261964..79cd204c7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -61,6 +61,33 @@ public: | |||
| 61 | 61 | ||
| 62 | ~GenericEnvironment() override = default; | 62 | ~GenericEnvironment() override = default; |
| 63 | 63 | ||
| 64 | u32 TextureBoundBuffer() const final { | ||
| 65 | return texture_bound; | ||
| 66 | } | ||
| 67 | |||
| 68 | u32 LocalMemorySize() const final { | ||
| 69 | return local_memory_size; | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 SharedMemorySize() const final { | ||
| 73 | return shared_memory_size; | ||
| 74 | } | ||
| 75 | |||
| 76 | std::array<u32, 3> WorkgroupSize() const final { | ||
| 77 | return workgroup_size; | ||
| 78 | } | ||
| 79 | |||
| 80 | u64 ReadInstruction(u32 address) final { | ||
| 81 | read_lowest = std::min(read_lowest, address); | ||
| 82 | read_highest = std::max(read_highest, address); | ||
| 83 | |||
| 84 | if (address >= cached_lowest && address < cached_highest) { | ||
| 85 | return code[(address - cached_lowest) / INST_SIZE]; | ||
| 86 | } | ||
| 87 | has_unbound_instructions = true; | ||
| 88 | return gpu_memory->Read<u64>(program_base + address); | ||
| 89 | } | ||
| 90 | |||
| 64 | std::optional<u128> Analyze() { | 91 | std::optional<u128> Analyze() { |
| 65 | const std::optional<u64> size{TryFindSize()}; | 92 | const std::optional<u64> size{TryFindSize()}; |
| 66 | if (!size) { | 93 | if (!size) { |
| @@ -97,26 +124,10 @@ public: | |||
| 97 | return Common::CityHash128(data.get(), size); | 124 | return Common::CityHash128(data.get(), size); |
| 98 | } | 125 | } |
| 99 | 126 | ||
| 100 | u64 ReadInstruction(u32 address) final { | ||
| 101 | read_lowest = std::min(read_lowest, address); | ||
| 102 | read_highest = std::max(read_highest, address); | ||
| 103 | |||
| 104 | if (address >= cached_lowest && address < cached_highest) { | ||
| 105 | return code[(address - cached_lowest) / INST_SIZE]; | ||
| 106 | } | ||
| 107 | has_unbound_instructions = true; | ||
| 108 | return gpu_memory->Read<u64>(program_base + address); | ||
| 109 | } | ||
| 110 | |||
| 111 | void Serialize(std::ofstream& file) const { | 127 | void Serialize(std::ofstream& file) const { |
| 112 | const u64 code_size{static_cast<u64>(ReadSize())}; | 128 | const u64 code_size{static_cast<u64>(CachedSize())}; |
| 113 | const auto data{std::make_unique<char[]>(code_size)}; | ||
| 114 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), code_size); | ||
| 115 | |||
| 116 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | 129 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; |
| 117 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | 130 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; |
| 118 | const u32 local_memory_size{LocalMemorySize()}; | ||
| 119 | const u32 texture_bound{TextureBoundBuffer()}; | ||
| 120 | 131 | ||
| 121 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | 132 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) |
| 122 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | 133 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) |
| @@ -124,10 +135,10 @@ public: | |||
| 124 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | 135 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) |
| 125 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | 136 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) |
| 126 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | 137 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) |
| 127 | .write(reinterpret_cast<const char*>(&read_lowest), sizeof(read_lowest)) | 138 | .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) |
| 128 | .write(reinterpret_cast<const char*>(&read_highest), sizeof(read_highest)) | 139 | .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) |
| 129 | .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) | 140 | .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) |
| 130 | .write(data.get(), code_size); | 141 | .write(reinterpret_cast<const char*>(code.data()), code_size); |
| 131 | for (const auto [key, type] : texture_types) { | 142 | for (const auto [key, type] : texture_types) { |
| 132 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | 143 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) |
| 133 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | 144 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); |
| @@ -137,8 +148,6 @@ public: | |||
| 137 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | 148 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); |
| 138 | } | 149 | } |
| 139 | if (stage == Shader::Stage::Compute) { | 150 | if (stage == Shader::Stage::Compute) { |
| 140 | const std::array<u32, 3> workgroup_size{WorkgroupSize()}; | ||
| 141 | const u32 shared_memory_size{SharedMemorySize()}; | ||
| 142 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | 151 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) |
| 143 | .write(reinterpret_cast<const char*>(&shared_memory_size), | 152 | .write(reinterpret_cast<const char*>(&shared_memory_size), |
| 144 | sizeof(shared_memory_size)); | 153 | sizeof(shared_memory_size)); |
| @@ -220,6 +229,11 @@ protected: | |||
| 220 | std::unordered_map<u64, Shader::TextureType> texture_types; | 229 | std::unordered_map<u64, Shader::TextureType> texture_types; |
| 221 | std::unordered_map<u64, u32> cbuf_values; | 230 | std::unordered_map<u64, u32> cbuf_values; |
| 222 | 231 | ||
| 232 | u32 local_memory_size{}; | ||
| 233 | u32 texture_bound{}; | ||
| 234 | u32 shared_memory_size{}; | ||
| 235 | std::array<u32, 3> workgroup_size{}; | ||
| 236 | |||
| 223 | u32 read_lowest = std::numeric_limits<u32>::max(); | 237 | u32 read_lowest = std::numeric_limits<u32>::max(); |
| 224 | u32 read_highest = 0; | 238 | u32 read_highest = 0; |
| 225 | 239 | ||
| @@ -270,6 +284,10 @@ public: | |||
| 270 | UNREACHABLE_MSG("Invalid program={}", program); | 284 | UNREACHABLE_MSG("Invalid program={}", program); |
| 271 | break; | 285 | break; |
| 272 | } | 286 | } |
| 287 | const u64 local_size{sph.LocalMemorySize()}; | ||
| 288 | ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||
| 289 | local_memory_size = static_cast<u32>(local_size); | ||
| 290 | texture_bound = maxwell3d->regs.tex_cb_index; | ||
| 273 | } | 291 | } |
| 274 | 292 | ||
| 275 | ~GraphicsEnvironment() override = default; | 293 | ~GraphicsEnvironment() override = default; |
| @@ -294,24 +312,6 @@ public: | |||
| 294 | cbuf.address, cbuf.size, cbuf_index, cbuf_offset); | 312 | cbuf.address, cbuf.size, cbuf_index, cbuf_offset); |
| 295 | } | 313 | } |
| 296 | 314 | ||
| 297 | u32 TextureBoundBuffer() const override { | ||
| 298 | return maxwell3d->regs.tex_cb_index; | ||
| 299 | } | ||
| 300 | |||
| 301 | u32 LocalMemorySize() const override { | ||
| 302 | const u64 size{sph.LocalMemorySize()}; | ||
| 303 | ASSERT(size <= std::numeric_limits<u32>::max()); | ||
| 304 | return static_cast<u32>(size); | ||
| 305 | } | ||
| 306 | |||
| 307 | u32 SharedMemorySize() const override { | ||
| 308 | throw Shader::LogicError("Requesting shared memory size in graphics stage"); | ||
| 309 | } | ||
| 310 | |||
| 311 | std::array<u32, 3> WorkgroupSize() const override { | ||
| 312 | throw Shader::LogicError("Requesting workgroup size in a graphics stage"); | ||
| 313 | } | ||
| 314 | |||
| 315 | private: | 315 | private: |
| 316 | Tegra::Engines::Maxwell3D* maxwell3d{}; | 316 | Tegra::Engines::Maxwell3D* maxwell3d{}; |
| 317 | size_t stage_index{}; | 317 | size_t stage_index{}; |
| @@ -325,7 +325,12 @@ public: | |||
| 325 | u32 start_address_) | 325 | u32 start_address_) |
| 326 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ | 326 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ |
| 327 | &kepler_compute_} { | 327 | &kepler_compute_} { |
| 328 | const auto& qmd{kepler_compute->launch_description}; | ||
| 328 | stage = Shader::Stage::Compute; | 329 | stage = Shader::Stage::Compute; |
| 330 | local_memory_size = qmd.local_pos_alloc; | ||
| 331 | texture_bound = kepler_compute->regs.tex_cb_index; | ||
| 332 | shared_memory_size = qmd.shared_alloc; | ||
| 333 | workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 329 | } | 334 | } |
| 330 | 335 | ||
| 331 | ~ComputeEnvironment() override = default; | 336 | ~ComputeEnvironment() override = default; |
| @@ -351,25 +356,6 @@ public: | |||
| 351 | cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); | 356 | cbuf.Address(), cbuf.size, cbuf_index, cbuf_offset); |
| 352 | } | 357 | } |
| 353 | 358 | ||
| 354 | u32 TextureBoundBuffer() const override { | ||
| 355 | return kepler_compute->regs.tex_cb_index; | ||
| 356 | } | ||
| 357 | |||
| 358 | u32 LocalMemorySize() const override { | ||
| 359 | const auto& qmd{kepler_compute->launch_description}; | ||
| 360 | return qmd.local_pos_alloc; | ||
| 361 | } | ||
| 362 | |||
| 363 | u32 SharedMemorySize() const override { | ||
| 364 | const auto& qmd{kepler_compute->launch_description}; | ||
| 365 | return qmd.shared_alloc; | ||
| 366 | } | ||
| 367 | |||
| 368 | std::array<u32, 3> WorkgroupSize() const override { | ||
| 369 | const auto& qmd{kepler_compute->launch_description}; | ||
| 370 | return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 371 | } | ||
| 372 | |||
| 373 | private: | 359 | private: |
| 374 | Tegra::Engines::KeplerCompute* kepler_compute{}; | 360 | Tegra::Engines::KeplerCompute* kepler_compute{}; |
| 375 | }; | 361 | }; |
| @@ -621,7 +607,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | |||
| 621 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, | 607 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, |
| 622 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, | 608 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, |
| 623 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, | 609 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, |
| 624 | workers(11, "yuzu:PipelineBuilder") { | 610 | workers(11, "yuzu:PipelineBuilder"), serialization_thread(1, "yuzu:PipelineSerialization") { |
| 625 | const auto& float_control{device.FloatControlProperties()}; | 611 | const auto& float_control{device.FloatControlProperties()}; |
| 626 | const VkDriverIdKHR driver_id{device.GetDriverID()}; | 612 | const VkDriverIdKHR driver_id{device.GetDriverID()}; |
| 627 | base_profile = Shader::Profile{ | 613 | base_profile = Shader::Profile{ |
| @@ -796,7 +782,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 796 | main_pools.ReleaseContents(); | 782 | main_pools.ReleaseContents(); |
| 797 | 783 | ||
| 798 | std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs; | 784 | std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs; |
| 799 | boost::container::static_vector<GenericEnvironment*, Maxwell::MaxShaderProgram> generic_envs; | ||
| 800 | boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs; | 785 | boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs; |
| 801 | 786 | ||
| 802 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 787 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; |
| @@ -810,13 +795,22 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 810 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | 795 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; |
| 811 | env.SetCachedSize(shader_infos[index]->size_bytes); | 796 | env.SetCachedSize(shader_infos[index]->size_bytes); |
| 812 | 797 | ||
| 813 | generic_envs.push_back(&env); | ||
| 814 | envs.push_back(&env); | 798 | envs.push_back(&env); |
| 815 | } | 799 | } |
| 816 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; | 800 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; |
| 817 | if (!pipeline_cache_filename.empty()) { | 801 | if (pipeline_cache_filename.empty()) { |
| 818 | SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); | 802 | return pipeline; |
| 819 | } | 803 | } |
| 804 | serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { | ||
| 805 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> | ||
| 806 | env_ptrs; | ||
| 807 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 808 | if (key.unique_hashes[index] != u128{}) { | ||
| 809 | env_ptrs.push_back(&envs[index]); | ||
| 810 | } | ||
| 811 | } | ||
| 812 | SerializePipeline(key, env_ptrs, pipeline_cache_filename); | ||
| 813 | }); | ||
| 820 | return pipeline; | 814 | return pipeline; |
| 821 | } | 815 | } |
| 822 | 816 | ||
| @@ -830,8 +824,10 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 830 | main_pools.ReleaseContents(); | 824 | main_pools.ReleaseContents(); |
| 831 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; | 825 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; |
| 832 | if (!pipeline_cache_filename.empty()) { | 826 | if (!pipeline_cache_filename.empty()) { |
| 833 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, | 827 | serialization_thread.QueueWork([this, key, env = std::move(env)] { |
| 834 | pipeline_cache_filename); | 828 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, |
| 829 | pipeline_cache_filename); | ||
| 830 | }); | ||
| 835 | } | 831 | } |
| 836 | return pipeline; | 832 | return pipeline; |
| 837 | } | 833 | } |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 609f00898..343ea1554 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -187,6 +187,7 @@ private: | |||
| 187 | std::string pipeline_cache_filename; | 187 | std::string pipeline_cache_filename; |
| 188 | 188 | ||
| 189 | Common::ThreadWorker workers; | 189 | Common::ThreadWorker workers; |
| 190 | Common::ThreadWorker serialization_thread; | ||
| 190 | }; | 191 | }; |
| 191 | 192 | ||
| 192 | } // namespace Vulkan | 193 | } // namespace Vulkan |