diff options
| author | 2020-01-06 21:55:06 -0300 | |
|---|---|---|
| committer | 2020-01-06 22:02:26 -0300 | |
| commit | 6888d776fffb3d5e105eddc271a2d6231abf0922 (patch) | |
| tree | 705f1e5214b0725817fb1504e738f95e37d767e9 /src | |
| parent | vk_graphics_pipeline: Initial implementation (diff) | |
| download | yuzu-6888d776fffb3d5e105eddc271a2d6231abf0922.tar.gz yuzu-6888d776fffb3d5e105eddc271a2d6231abf0922.tar.xz yuzu-6888d776fffb3d5e105eddc271a2d6231abf0922.zip | |
vk_pipeline_cache: Initial implementation
Given a pipeline key, this cache returns a pipeline abstraction (for
graphics or compute).
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 352 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 109 |
2 files changed, 460 insertions, 1 deletions
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9bc027cbf..48e23d4cd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -2,16 +2,368 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <memory> | ||
| 6 | #include <vector> | 8 | #include <vector> |
| 7 | 9 | ||
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "core/memory.h" | ||
| 13 | #include "video_core/engines/kepler_compute.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/memory_manager.h" | ||
| 8 | #include "video_core/renderer_vulkan/declarations.h" | 16 | #include "video_core/renderer_vulkan/declarations.h" |
| 17 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | ||
| 18 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | 21 | #include "video_core/renderer_vulkan/vk_device.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 24 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 28 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 29 | #include "video_core/shader/compiler_settings.h" | ||
| 12 | 30 | ||
| 13 | namespace Vulkan { | 31 | namespace Vulkan { |
| 14 | 32 | ||
| 33 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||
| 34 | |||
| 35 | using Tegra::Engines::ShaderType; | ||
| 36 | |||
| 37 | namespace { | ||
| 38 | |||
| 39 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | ||
| 40 | VideoCommon::Shader::CompileDepth::FullDecompile}; | ||
| 41 | |||
| 42 | /// Gets the address for the specified shader stage program | ||
| 43 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | ||
| 44 | const auto& gpu{system.GPU().Maxwell3D()}; | ||
| 45 | const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; | ||
| 46 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | ||
| 47 | } | ||
| 48 | |||
| 49 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 50 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 51 | // Sched instructions appear once every 4 instructions. | ||
| 52 | constexpr std::size_t SchedPeriod = 4; | ||
| 53 | const std::size_t absolute_offset = offset - main_offset; | ||
| 54 | return (absolute_offset % SchedPeriod) == 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Calculates the size of a program stream | ||
| 58 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { | ||
| 59 | const std::size_t start_offset = is_compute ? 0 : 10; | ||
| 60 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 61 | // shaders end with one. | ||
| 62 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 63 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 64 | std::size_t offset = start_offset; | ||
| 65 | while (offset < program.size()) { | ||
| 66 | const u64 instruction = program[offset]; | ||
| 67 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 68 | if ((instruction & mask) == self_jumping_branch) { | ||
| 69 | // End on Maxwell's "nop" instruction | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | if (instruction == 0) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | ++offset; | ||
| 77 | } | ||
| 78 | // The last instruction is included in the program size | ||
| 79 | return std::min(offset + 1, program.size()); | ||
| 80 | } | ||
| 81 | |||
| 82 | /// Gets the shader program code from memory for the specified address | ||
| 83 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | ||
| 84 | const u8* host_ptr, bool is_compute) { | ||
| 85 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | ||
| 86 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { | ||
| 87 | std::fill(program_code.begin(), program_code.end(), 0); | ||
| 88 | return program_code; | ||
| 89 | }); | ||
| 90 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), | ||
| 91 | program_code.size() * sizeof(u64)); | ||
| 92 | program_code.resize(CalculateProgramSize(program_code, is_compute)); | ||
| 93 | return program_code; | ||
| 94 | } | ||
| 95 | |||
| 96 | constexpr std::size_t GetStageFromProgram(std::size_t program) { | ||
| 97 | return program == 0 ? 0 : program - 1; | ||
| 98 | } | ||
| 99 | |||
| 100 | constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) { | ||
| 101 | return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program))); | ||
| 102 | } | ||
| 103 | |||
| 104 | ShaderType GetShaderType(Maxwell::ShaderProgram program) { | ||
| 105 | switch (program) { | ||
| 106 | case Maxwell::ShaderProgram::VertexB: | ||
| 107 | return ShaderType::Vertex; | ||
| 108 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 109 | return ShaderType::TesselationControl; | ||
| 110 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 111 | return ShaderType::TesselationEval; | ||
| 112 | case Maxwell::ShaderProgram::Geometry: | ||
| 113 | return ShaderType::Geometry; | ||
| 114 | case Maxwell::ShaderProgram::Fragment: | ||
| 115 | return ShaderType::Fragment; | ||
| 116 | default: | ||
| 117 | UNIMPLEMENTED_MSG("program={}", static_cast<u32>(program)); | ||
| 118 | return ShaderType::Vertex; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | u32 FillDescriptorLayout(const ShaderEntries& entries, | ||
| 123 | std::vector<vk::DescriptorSetLayoutBinding>& bindings, | ||
| 124 | Maxwell::ShaderProgram program_type, u32 base_binding) { | ||
| 125 | const ShaderType stage = GetStageFromProgram(program_type); | ||
| 126 | const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); | ||
| 127 | |||
| 128 | u32 binding = base_binding; | ||
| 129 | const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { | ||
| 130 | for (std::size_t i = 0; i < num_entries; ++i) { | ||
| 131 | bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); | ||
| 132 | } | ||
| 133 | }; | ||
| 134 | AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | ||
| 135 | AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | ||
| 136 | AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | ||
| 137 | AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | ||
| 138 | AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); | ||
| 139 | return binding; | ||
| 140 | } | ||
| 141 | |||
| 142 | } // Anonymous namespace | ||
| 143 | |||
| 144 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | ||
| 145 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | ||
| 146 | ProgramCode program_code, u32 main_offset) | ||
| 147 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | ||
| 148 | program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, | ||
| 149 | shader_ir{this->program_code, main_offset, compiler_settings, locker}, | ||
| 150 | entries{GenerateShaderEntries(shader_ir)} {} | ||
| 151 | |||
| 152 | CachedShader::~CachedShader() = default; | ||
| 153 | |||
| 154 | Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | ||
| 155 | Core::System& system, Tegra::Engines::ShaderType stage) { | ||
| 156 | if (stage == Tegra::Engines::ShaderType::Compute) { | ||
| 157 | return system.GPU().KeplerCompute(); | ||
| 158 | } else { | ||
| 159 | return system.GPU().Maxwell3D(); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | ||
| 164 | const VKDevice& device, VKScheduler& scheduler, | ||
| 165 | VKDescriptorPool& descriptor_pool, | ||
| 166 | VKUpdateDescriptorQueue& update_descriptor_queue) | ||
| 167 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | ||
| 168 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | ||
| 169 | renderpass_cache(device) {} | ||
| 170 | |||
| 171 | VKPipelineCache::~VKPipelineCache() = default; | ||
| 172 | |||
| 173 | std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | ||
| 174 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 175 | auto& dirty = system.GPU().Maxwell3D().dirty.shaders; | ||
| 176 | if (!dirty) { | ||
| 177 | return last_shaders; | ||
| 178 | } | ||
| 179 | dirty = false; | ||
| 180 | |||
| 181 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; | ||
| 182 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 183 | const auto& shader_config = gpu.regs.shader_config[index]; | ||
| 184 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 185 | |||
| 186 | // Skip stages that are not enabled | ||
| 187 | if (!gpu.regs.IsShaderConfigEnabled(index)) { | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | |||
| 191 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 192 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | ||
| 193 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||
| 194 | auto shader = TryGet(host_ptr); | ||
| 195 | if (!shader) { | ||
| 196 | // No shader found - create a new one | ||
| 197 | constexpr u32 stage_offset = 10; | ||
| 198 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | ||
| 199 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | ||
| 200 | |||
| 201 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||
| 202 | ASSERT(cpu_addr); | ||
| 203 | |||
| 204 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | ||
| 205 | host_ptr, std::move(code), stage_offset); | ||
| 206 | Register(shader); | ||
| 207 | } | ||
| 208 | shaders[index] = std::move(shader); | ||
| 209 | } | ||
| 210 | return last_shaders = shaders; | ||
| 211 | } | ||
| 212 | |||
| 213 | VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { | ||
| 214 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 215 | |||
| 216 | if (last_graphics_pipeline && last_graphics_key == key) { | ||
| 217 | return *last_graphics_pipeline; | ||
| 218 | } | ||
| 219 | last_graphics_key = key; | ||
| 220 | |||
| 221 | const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); | ||
| 222 | auto& entry = pair->second; | ||
| 223 | if (is_cache_miss) { | ||
| 224 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 225 | const auto [program, bindings] = DecompileShaders(key); | ||
| 226 | entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, | ||
| 227 | update_descriptor_queue, renderpass_cache, key, | ||
| 228 | bindings, program); | ||
| 229 | } | ||
| 230 | return *(last_graphics_pipeline = entry.get()); | ||
| 231 | } | ||
| 232 | |||
| 233 | VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { | ||
| 234 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | ||
| 235 | |||
| 236 | const auto [pair, is_cache_miss] = compute_cache.try_emplace(key); | ||
| 237 | auto& entry = pair->second; | ||
| 238 | if (!is_cache_miss) { | ||
| 239 | return *entry; | ||
| 240 | } | ||
| 241 | LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); | ||
| 242 | |||
| 243 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 244 | const auto program_addr = key.shader; | ||
| 245 | const auto host_ptr = memory_manager.GetPointer(program_addr); | ||
| 246 | |||
| 247 | auto shader = TryGet(host_ptr); | ||
| 248 | if (!shader) { | ||
| 249 | // No shader found - create a new one | ||
| 250 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||
| 251 | ASSERT(cpu_addr); | ||
| 252 | |||
| 253 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | ||
| 254 | constexpr u32 kernel_main_offset = 0; | ||
| 255 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | ||
| 256 | program_addr, *cpu_addr, host_ptr, std::move(code), | ||
| 257 | kernel_main_offset); | ||
| 258 | Register(shader); | ||
| 259 | } | ||
| 260 | |||
| 261 | Specialization specialization; | ||
| 262 | specialization.workgroup_size = key.workgroup_size; | ||
| 263 | specialization.shared_memory_size = key.shared_memory_size; | ||
| 264 | |||
| 265 | const SPIRVShader spirv_shader{ | ||
| 266 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | ||
| 267 | shader->GetEntries()}; | ||
| 268 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | ||
| 269 | update_descriptor_queue, spirv_shader); | ||
| 270 | return *entry; | ||
| 271 | } | ||
| 272 | |||
| 273 | void VKPipelineCache::Unregister(const Shader& shader) { | ||
| 274 | bool finished = false; | ||
| 275 | const auto Finish = [&] { | ||
| 276 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | ||
| 277 | // flush. | ||
| 278 | if (finished) { | ||
| 279 | return; | ||
| 280 | } | ||
| 281 | finished = true; | ||
| 282 | scheduler.Finish(); | ||
| 283 | }; | ||
| 284 | |||
| 285 | const GPUVAddr invalidated_addr = shader->GetGpuAddr(); | ||
| 286 | for (auto it = graphics_cache.begin(); it != graphics_cache.end();) { | ||
| 287 | auto& entry = it->first; | ||
| 288 | if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) == | ||
| 289 | entry.shaders.end()) { | ||
| 290 | ++it; | ||
| 291 | continue; | ||
| 292 | } | ||
| 293 | Finish(); | ||
| 294 | it = graphics_cache.erase(it); | ||
| 295 | } | ||
| 296 | for (auto it = compute_cache.begin(); it != compute_cache.end();) { | ||
| 297 | auto& entry = it->first; | ||
| 298 | if (entry.shader != invalidated_addr) { | ||
| 299 | ++it; | ||
| 300 | continue; | ||
| 301 | } | ||
| 302 | Finish(); | ||
| 303 | it = compute_cache.erase(it); | ||
| 304 | } | ||
| 305 | |||
| 306 | RasterizerCache::Unregister(shader); | ||
| 307 | } | ||
| 308 | |||
| 309 | std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> | ||
| 310 | VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | ||
| 311 | const auto& fixed_state = key.fixed_state; | ||
| 312 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 313 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 314 | |||
| 315 | Specialization specialization; | ||
| 316 | specialization.primitive_topology = fixed_state.input_assembly.topology; | ||
| 317 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 318 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | ||
| 319 | specialization.point_size = fixed_state.input_assembly.point_size; | ||
| 320 | } | ||
| 321 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | ||
| 322 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | ||
| 323 | } | ||
| 324 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | ||
| 325 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 326 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 327 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 328 | for (const auto& rt : key.renderpass_params.color_attachments) { | ||
| 329 | specialization.enabled_rendertargets.set(rt.index); | ||
| 330 | } | ||
| 331 | |||
| 332 | SPIRVProgram program; | ||
| 333 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | ||
| 334 | |||
| 335 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 336 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | ||
| 337 | |||
| 338 | // Skip stages that are not enabled | ||
| 339 | if (!gpu.regs.IsShaderConfigEnabled(index)) { | ||
| 340 | continue; | ||
| 341 | } | ||
| 342 | |||
| 343 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | ||
| 344 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 345 | const auto shader = TryGet(host_ptr); | ||
| 346 | ASSERT(shader); | ||
| 347 | |||
| 348 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | ||
| 349 | const auto program_type = GetShaderType(program_enum); | ||
| 350 | const auto& entries = shader->GetEntries(); | ||
| 351 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | ||
| 352 | entries}; | ||
| 353 | |||
| 354 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | ||
| 355 | // VertexB was combined with VertexA, so we skip the VertexB iteration | ||
| 356 | ++index; | ||
| 357 | } | ||
| 358 | |||
| 359 | const u32 old_binding = specialization.base_binding; | ||
| 360 | specialization.base_binding = | ||
| 361 | FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding); | ||
| 362 | ASSERT(old_binding + entries.NumBindings() == specialization.base_binding); | ||
| 363 | } | ||
| 364 | return {std::move(program), std::move(bindings)}; | ||
| 365 | } | ||
| 366 | |||
| 15 | void FillDescriptorUpdateTemplateEntries( | 367 | void FillDescriptorUpdateTemplateEntries( |
| 16 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, | 368 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, |
| 17 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { | 369 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e49ed135d..8678fc9c3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -6,23 +6,49 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <memory> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <utility> | ||
| 9 | #include <vector> | 14 | #include <vector> |
| 10 | 15 | ||
| 11 | #include <boost/functional/hash.hpp> | 16 | #include <boost/functional/hash.hpp> |
| 12 | 17 | ||
| 13 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 19 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/rasterizer_cache.h" | ||
| 15 | #include "video_core/renderer_vulkan/declarations.h" | 22 | #include "video_core/renderer_vulkan/declarations.h" |
| 16 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 24 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | ||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 28 | #include "video_core/shader/const_buffer_locker.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | 29 | #include "video_core/shader/shader_ir.h" |
| 30 | #include "video_core/surface.h" | ||
| 31 | |||
| 32 | namespace Core { | ||
| 33 | class System; | ||
| 34 | } | ||
| 19 | 35 | ||
| 20 | namespace Vulkan { | 36 | namespace Vulkan { |
| 21 | 37 | ||
| 38 | class RasterizerVulkan; | ||
| 39 | class VKComputePipeline; | ||
| 40 | class VKDescriptorPool; | ||
| 22 | class VKDevice; | 41 | class VKDevice; |
| 42 | class VKFence; | ||
| 43 | class VKScheduler; | ||
| 44 | class VKUpdateDescriptorQueue; | ||
| 23 | 45 | ||
| 46 | class CachedShader; | ||
| 47 | using Shader = std::shared_ptr<CachedShader>; | ||
| 24 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 48 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 25 | 49 | ||
| 50 | using ProgramCode = std::vector<u64>; | ||
| 51 | |||
| 26 | struct GraphicsPipelineCacheKey { | 52 | struct GraphicsPipelineCacheKey { |
| 27 | FixedPipelineState fixed_state; | 53 | FixedPipelineState fixed_state; |
| 28 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; | 54 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; |
| @@ -84,7 +110,88 @@ struct hash<Vulkan::ComputePipelineCacheKey> { | |||
| 84 | 110 | ||
| 85 | namespace Vulkan { | 111 | namespace Vulkan { |
| 86 | 112 | ||
| 87 | class VKDevice; | 113 | class CachedShader final : public RasterizerCacheObject { |
| 114 | public: | ||
| 115 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | ||
| 116 | VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); | ||
| 117 | ~CachedShader(); | ||
| 118 | |||
| 119 | GPUVAddr GetGpuAddr() const { | ||
| 120 | return gpu_addr; | ||
| 121 | } | ||
| 122 | |||
| 123 | VAddr GetCpuAddr() const override { | ||
| 124 | return cpu_addr; | ||
| 125 | } | ||
| 126 | |||
| 127 | std::size_t GetSizeInBytes() const override { | ||
| 128 | return program_code.size() * sizeof(u64); | ||
| 129 | } | ||
| 130 | |||
| 131 | VideoCommon::Shader::ShaderIR& GetIR() { | ||
| 132 | return shader_ir; | ||
| 133 | } | ||
| 134 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | ||
| 136 | return shader_ir; | ||
| 137 | } | ||
| 138 | |||
| 139 | const ShaderEntries& GetEntries() const { | ||
| 140 | return entries; | ||
| 141 | } | ||
| 142 | |||
| 143 | private: | ||
| 144 | static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system, | ||
| 145 | Tegra::Engines::ShaderType stage); | ||
| 146 | |||
| 147 | GPUVAddr gpu_addr{}; | ||
| 148 | VAddr cpu_addr{}; | ||
| 149 | ProgramCode program_code; | ||
| 150 | VideoCommon::Shader::ConstBufferLocker locker; | ||
| 151 | VideoCommon::Shader::ShaderIR shader_ir; | ||
| 152 | ShaderEntries entries; | ||
| 153 | }; | ||
| 154 | |||
| 155 | class VKPipelineCache final : public RasterizerCache<Shader> { | ||
| 156 | public: | ||
| 157 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | ||
| 158 | const VKDevice& device, VKScheduler& scheduler, | ||
| 159 | VKDescriptorPool& descriptor_pool, | ||
| 160 | VKUpdateDescriptorQueue& update_descriptor_queue); | ||
| 161 | ~VKPipelineCache(); | ||
| 162 | |||
| 163 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | ||
| 164 | |||
| 165 | VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); | ||
| 166 | |||
| 167 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | ||
| 168 | |||
| 169 | protected: | ||
| 170 | void Unregister(const Shader& shader) override; | ||
| 171 | |||
| 172 | void FlushObjectInner(const Shader& object) override {} | ||
| 173 | |||
| 174 | private: | ||
| 175 | std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders( | ||
| 176 | const GraphicsPipelineCacheKey& key); | ||
| 177 | |||
| 178 | Core::System& system; | ||
| 179 | const VKDevice& device; | ||
| 180 | VKScheduler& scheduler; | ||
| 181 | VKDescriptorPool& descriptor_pool; | ||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 183 | |||
| 184 | VKRenderPassCache renderpass_cache; | ||
| 185 | |||
| 186 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||
| 187 | |||
| 188 | GraphicsPipelineCacheKey last_graphics_key; | ||
| 189 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | ||
| 190 | |||
| 191 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> | ||
| 192 | graphics_cache; | ||
| 193 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; | ||
| 194 | }; | ||
| 88 | 195 | ||
| 89 | void FillDescriptorUpdateTemplateEntries( | 196 | void FillDescriptorUpdateTemplateEntries( |
| 90 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, | 197 | const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, |