diff options
| author | 2021-04-26 03:53:26 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:29 -0400 | |
| commit | 025b20f96ae588777e3ff11083cc4184bf418af6 (patch) | |
| tree | 7cda9932a219409196adfc8a8d7d5793840657c1 /src/video_core/renderer_vulkan | |
| parent | vulkan: Defer descriptor set work to the Vulkan thread (diff) | |
| download | yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.tar.gz yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.tar.xz yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.zip | |
shader: Move pipeline cache logic to separate files
Move code to separate files to be able to reuse it from OpenGL. This
greatly simplifies the pipeline cache logic on Vulkan.
Transform feedback state is not yet abstracted and it's still
intrusively stored inside vk_pipeline_cache. It will be moved when
needed on OpenGL.
Diffstat (limited to 'src/video_core/renderer_vulkan')
4 files changed, 104 insertions, 649 deletions
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | namespace Vulkan { | 23 | namespace Vulkan { |
| 24 | 24 | ||
| 25 | struct GraphicsPipelineCacheKey { | 25 | struct GraphicsPipelineCacheKey { |
| 26 | std::array<u128, 6> unique_hashes; | 26 | std::array<u64, 6> unique_hashes; |
| 27 | FixedPipelineState state; | 27 | FixedPipelineState state; |
| 28 | 28 | ||
| 29 | size_t Hash() const noexcept; | 29 | size_t Hash() const noexcept; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -11,7 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | #include "common/bit_cast.h" | 12 | #include "common/bit_cast.h" |
| 13 | #include "common/cityhash.h" | 13 | #include "common/cityhash.h" |
| 14 | #include "common/file_util.h" | 14 | #include "common/fs/fs.h" |
| 15 | #include "common/fs/path_util.h" | ||
| 15 | #include "common/microprofile.h" | 16 | #include "common/microprofile.h" |
| 16 | #include "common/thread_worker.h" | 17 | #include "common/thread_worker.h" |
| 17 | #include "core/core.h" | 18 | #include "core/core.h" |
| @@ -36,6 +37,7 @@ | |||
| 36 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 37 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 37 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 38 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 38 | #include "video_core/shader_cache.h" | 39 | #include "video_core/shader_cache.h" |
| 40 | #include "video_core/shader_environment.h" | ||
| 39 | #include "video_core/shader_notify.h" | 41 | #include "video_core/shader_notify.h" |
| 40 | #include "video_core/vulkan_common/vulkan_device.h" | 42 | #include "video_core/vulkan_common/vulkan_device.h" |
| 41 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 43 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -43,449 +45,19 @@ | |||
| 43 | namespace Vulkan { | 45 | namespace Vulkan { |
| 44 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 46 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 45 | 47 | ||
| 46 | template <typename Container> | ||
| 47 | auto MakeSpan(Container& container) { | ||
| 48 | return std::span(container.data(), container.size()); | ||
| 49 | } | ||
| 50 | |||
| 51 | static u64 MakeCbufKey(u32 index, u32 offset) { | ||
| 52 | return (static_cast<u64>(index) << 32) | offset; | ||
| 53 | } | ||
| 54 | |||
| 55 | class GenericEnvironment : public Shader::Environment { | ||
| 56 | public: | ||
| 57 | explicit GenericEnvironment() = default; | ||
| 58 | explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 59 | u32 start_address_) | ||
| 60 | : gpu_memory{&gpu_memory_}, program_base{program_base_} { | ||
| 61 | start_address = start_address_; | ||
| 62 | } | ||
| 63 | |||
| 64 | ~GenericEnvironment() override = default; | ||
| 65 | |||
| 66 | u32 TextureBoundBuffer() const final { | ||
| 67 | return texture_bound; | ||
| 68 | } | ||
| 69 | |||
| 70 | u32 LocalMemorySize() const final { | ||
| 71 | return local_memory_size; | ||
| 72 | } | ||
| 73 | |||
| 74 | u32 SharedMemorySize() const final { | ||
| 75 | return shared_memory_size; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::array<u32, 3> WorkgroupSize() const final { | ||
| 79 | return workgroup_size; | ||
| 80 | } | ||
| 81 | |||
| 82 | u64 ReadInstruction(u32 address) final { | ||
| 83 | read_lowest = std::min(read_lowest, address); | ||
| 84 | read_highest = std::max(read_highest, address); | ||
| 85 | |||
| 86 | if (address >= cached_lowest && address < cached_highest) { | ||
| 87 | return code[(address - cached_lowest) / INST_SIZE]; | ||
| 88 | } | ||
| 89 | has_unbound_instructions = true; | ||
| 90 | return gpu_memory->Read<u64>(program_base + address); | ||
| 91 | } | ||
| 92 | |||
| 93 | std::optional<u128> Analyze() { | ||
| 94 | const std::optional<u64> size{TryFindSize()}; | ||
| 95 | if (!size) { | ||
| 96 | return std::nullopt; | ||
| 97 | } | ||
| 98 | cached_lowest = start_address; | ||
| 99 | cached_highest = start_address + static_cast<u32>(*size); | ||
| 100 | return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size); | ||
| 101 | } | ||
| 102 | |||
| 103 | void SetCachedSize(size_t size_bytes) { | ||
| 104 | cached_lowest = start_address; | ||
| 105 | cached_highest = start_address + static_cast<u32>(size_bytes); | ||
| 106 | code.resize(CachedSize()); | ||
| 107 | gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); | ||
| 108 | } | ||
| 109 | |||
| 110 | [[nodiscard]] size_t CachedSize() const noexcept { | ||
| 111 | return cached_highest - cached_lowest + INST_SIZE; | ||
| 112 | } | ||
| 113 | |||
| 114 | [[nodiscard]] size_t ReadSize() const noexcept { | ||
| 115 | return read_highest - read_lowest + INST_SIZE; | ||
| 116 | } | ||
| 117 | |||
| 118 | [[nodiscard]] bool CanBeSerialized() const noexcept { | ||
| 119 | return !has_unbound_instructions; | ||
| 120 | } | ||
| 121 | |||
| 122 | [[nodiscard]] u128 CalculateHash() const { | ||
| 123 | const size_t size{ReadSize()}; | ||
| 124 | const auto data{std::make_unique<char[]>(size)}; | ||
| 125 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); | ||
| 126 | return Common::CityHash128(data.get(), size); | ||
| 127 | } | ||
| 128 | |||
| 129 | void Serialize(std::ofstream& file) const { | ||
| 130 | const u64 code_size{static_cast<u64>(CachedSize())}; | ||
| 131 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||
| 132 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||
| 133 | |||
| 134 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||
| 135 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 136 | .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 137 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 138 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||
| 139 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | ||
| 140 | .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) | ||
| 141 | .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) | ||
| 142 | .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) | ||
| 143 | .write(reinterpret_cast<const char*>(code.data()), code_size); | ||
| 144 | for (const auto [key, type] : texture_types) { | ||
| 145 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 146 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 147 | } | ||
| 148 | for (const auto [key, type] : cbuf_values) { | ||
| 149 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 150 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 151 | } | ||
| 152 | if (stage == Shader::Stage::Compute) { | ||
| 153 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 154 | .write(reinterpret_cast<const char*>(&shared_memory_size), | ||
| 155 | sizeof(shared_memory_size)); | ||
| 156 | } else { | ||
| 157 | file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | protected: | ||
| 162 | static constexpr size_t INST_SIZE = sizeof(u64); | ||
| 163 | |||
| 164 | std::optional<u64> TryFindSize() { | ||
| 165 | constexpr size_t BLOCK_SIZE = 0x1000; | ||
| 166 | constexpr size_t MAXIMUM_SIZE = 0x100000; | ||
| 167 | |||
| 168 | constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; | ||
| 169 | constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; | ||
| 170 | |||
| 171 | GPUVAddr guest_addr{program_base + start_address}; | ||
| 172 | size_t offset{0}; | ||
| 173 | size_t size{BLOCK_SIZE}; | ||
| 174 | while (size <= MAXIMUM_SIZE) { | ||
| 175 | code.resize(size / INST_SIZE); | ||
| 176 | u64* const data = code.data() + offset / INST_SIZE; | ||
| 177 | gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); | ||
| 178 | for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { | ||
| 179 | const u64 inst = data[index / INST_SIZE]; | ||
| 180 | if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { | ||
| 181 | return offset + index; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | guest_addr += BLOCK_SIZE; | ||
| 185 | size += BLOCK_SIZE; | ||
| 186 | offset += BLOCK_SIZE; | ||
| 187 | } | ||
| 188 | return std::nullopt; | ||
| 189 | } | ||
| 190 | |||
| 191 | Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, | ||
| 192 | u32 raw) { | ||
| 193 | const TextureHandle handle{raw, via_header_index}; | ||
| 194 | const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; | ||
| 195 | Tegra::Texture::TICEntry entry; | ||
| 196 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | ||
| 197 | |||
| 198 | const Shader::TextureType result{[&] { | ||
| 199 | switch (entry.texture_type) { | ||
| 200 | case Tegra::Texture::TextureType::Texture1D: | ||
| 201 | return Shader::TextureType::Color1D; | ||
| 202 | case Tegra::Texture::TextureType::Texture2D: | ||
| 203 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 204 | return Shader::TextureType::Color2D; | ||
| 205 | case Tegra::Texture::TextureType::Texture3D: | ||
| 206 | return Shader::TextureType::Color3D; | ||
| 207 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 208 | return Shader::TextureType::ColorCube; | ||
| 209 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 210 | return Shader::TextureType::ColorArray1D; | ||
| 211 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 212 | return Shader::TextureType::ColorArray2D; | ||
| 213 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 214 | return Shader::TextureType::Buffer; | ||
| 215 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 216 | return Shader::TextureType::ColorArrayCube; | ||
| 217 | default: | ||
| 218 | throw Shader::NotImplementedException("Unknown texture type"); | ||
| 219 | } | ||
| 220 | }()}; | ||
| 221 | texture_types.emplace(raw, result); | ||
| 222 | return result; | ||
| 223 | } | ||
| 224 | |||
| 225 | Tegra::MemoryManager* gpu_memory{}; | ||
| 226 | GPUVAddr program_base{}; | ||
| 227 | |||
| 228 | std::vector<u64> code; | ||
| 229 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 230 | std::unordered_map<u64, u32> cbuf_values; | ||
| 231 | |||
| 232 | u32 local_memory_size{}; | ||
| 233 | u32 texture_bound{}; | ||
| 234 | u32 shared_memory_size{}; | ||
| 235 | std::array<u32, 3> workgroup_size{}; | ||
| 236 | |||
| 237 | u32 read_lowest = std::numeric_limits<u32>::max(); | ||
| 238 | u32 read_highest = 0; | ||
| 239 | |||
| 240 | u32 cached_lowest = std::numeric_limits<u32>::max(); | ||
| 241 | u32 cached_highest = 0; | ||
| 242 | |||
| 243 | bool has_unbound_instructions = false; | ||
| 244 | }; | ||
| 245 | |||
| 246 | namespace { | 48 | namespace { |
| 247 | using Shader::Backend::SPIRV::EmitSPIRV; | 49 | using Shader::Backend::SPIRV::EmitSPIRV; |
| 248 | using Shader::Maxwell::TranslateProgram; | 50 | using Shader::Maxwell::TranslateProgram; |
| 51 | using VideoCommon::ComputeEnvironment; | ||
| 52 | using VideoCommon::FileEnvironment; | ||
| 53 | using VideoCommon::GenericEnvironment; | ||
| 54 | using VideoCommon::GraphicsEnvironment; | ||
| 249 | 55 | ||
| 250 | // TODO: Move this to a separate file | 56 | template <typename Container> |
| 251 | constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; | 57 | auto MakeSpan(Container& container) { |
| 252 | constexpr u32 CACHE_VERSION{2}; | 58 | return std::span(container.data(), container.size()); |
| 253 | |||
| 254 | class GraphicsEnvironment final : public GenericEnvironment { | ||
| 255 | public: | ||
| 256 | explicit GraphicsEnvironment() = default; | ||
| 257 | explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 258 | Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, | ||
| 259 | GPUVAddr program_base_, u32 start_address_) | ||
| 260 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { | ||
| 261 | gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); | ||
| 262 | switch (program) { | ||
| 263 | case Maxwell::ShaderProgram::VertexA: | ||
| 264 | stage = Shader::Stage::VertexA; | ||
| 265 | stage_index = 0; | ||
| 266 | break; | ||
| 267 | case Maxwell::ShaderProgram::VertexB: | ||
| 268 | stage = Shader::Stage::VertexB; | ||
| 269 | stage_index = 0; | ||
| 270 | break; | ||
| 271 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 272 | stage = Shader::Stage::TessellationControl; | ||
| 273 | stage_index = 1; | ||
| 274 | break; | ||
| 275 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 276 | stage = Shader::Stage::TessellationEval; | ||
| 277 | stage_index = 2; | ||
| 278 | break; | ||
| 279 | case Maxwell::ShaderProgram::Geometry: | ||
| 280 | stage = Shader::Stage::Geometry; | ||
| 281 | stage_index = 3; | ||
| 282 | break; | ||
| 283 | case Maxwell::ShaderProgram::Fragment: | ||
| 284 | stage = Shader::Stage::Fragment; | ||
| 285 | stage_index = 4; | ||
| 286 | break; | ||
| 287 | default: | ||
| 288 | UNREACHABLE_MSG("Invalid program={}", program); | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | const u64 local_size{sph.LocalMemorySize()}; | ||
| 292 | ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||
| 293 | local_memory_size = static_cast<u32>(local_size); | ||
| 294 | texture_bound = maxwell3d->regs.tex_cb_index; | ||
| 295 | } | ||
| 296 | |||
| 297 | ~GraphicsEnvironment() override = default; | ||
| 298 | |||
| 299 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { | ||
| 300 | const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; | ||
| 301 | ASSERT(cbuf.enabled); | ||
| 302 | u32 value{}; | ||
| 303 | if (cbuf_offset < cbuf.size) { | ||
| 304 | value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); | ||
| 305 | } | ||
| 306 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 307 | return value; | ||
| 308 | } | ||
| 309 | |||
| 310 | Shader::TextureType ReadTextureType(u32 handle) override { | ||
| 311 | const auto& regs{maxwell3d->regs}; | ||
| 312 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 313 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); | ||
| 314 | } | ||
| 315 | |||
| 316 | private: | ||
| 317 | Tegra::Engines::Maxwell3D* maxwell3d{}; | ||
| 318 | size_t stage_index{}; | ||
| 319 | }; | ||
| 320 | |||
| 321 | class ComputeEnvironment final : public GenericEnvironment { | ||
| 322 | public: | ||
| 323 | explicit ComputeEnvironment() = default; | ||
| 324 | explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 325 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 326 | u32 start_address_) | ||
| 327 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ | ||
| 328 | &kepler_compute_} { | ||
| 329 | const auto& qmd{kepler_compute->launch_description}; | ||
| 330 | stage = Shader::Stage::Compute; | ||
| 331 | local_memory_size = qmd.local_pos_alloc; | ||
| 332 | texture_bound = kepler_compute->regs.tex_cb_index; | ||
| 333 | shared_memory_size = qmd.shared_alloc; | ||
| 334 | workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 335 | } | ||
| 336 | |||
| 337 | ~ComputeEnvironment() override = default; | ||
| 338 | |||
| 339 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { | ||
| 340 | const auto& qmd{kepler_compute->launch_description}; | ||
| 341 | ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); | ||
| 342 | const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; | ||
| 343 | u32 value{}; | ||
| 344 | if (cbuf_offset < cbuf.size) { | ||
| 345 | value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); | ||
| 346 | } | ||
| 347 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 348 | return value; | ||
| 349 | } | ||
| 350 | |||
| 351 | Shader::TextureType ReadTextureType(u32 handle) override { | ||
| 352 | const auto& regs{kepler_compute->regs}; | ||
| 353 | const auto& qmd{kepler_compute->launch_description}; | ||
| 354 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||
| 355 | } | ||
| 356 | |||
| 357 | private: | ||
| 358 | Tegra::Engines::KeplerCompute* kepler_compute{}; | ||
| 359 | }; | ||
| 360 | |||
| 361 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||
| 362 | std::ofstream& file) { | ||
| 363 | if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { | ||
| 364 | return; | ||
| 365 | } | ||
| 366 | const u32 num_envs{static_cast<u32>(envs.size())}; | ||
| 367 | file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); | ||
| 368 | for (const GenericEnvironment* const env : envs) { | ||
| 369 | env->Serialize(file); | ||
| 370 | } | ||
| 371 | file.write(key.data(), key.size_bytes()); | ||
| 372 | } | ||
| 373 | |||
| 374 | template <typename Key, typename Envs> | ||
| 375 | void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { | ||
| 376 | try { | ||
| 377 | std::ofstream file; | ||
| 378 | file.exceptions(std::ifstream::failbit); | ||
| 379 | Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); | ||
| 380 | if (!file.is_open()) { | ||
| 381 | LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); | ||
| 382 | return; | ||
| 383 | } | ||
| 384 | if (file.tellp() == 0) { | ||
| 385 | file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) | ||
| 386 | .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION)); | ||
| 387 | } | ||
| 388 | const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key)); | ||
| 389 | SerializePipeline(key_span, MakeSpan(envs), file); | ||
| 390 | |||
| 391 | } catch (const std::ios_base::failure& e) { | ||
| 392 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 393 | if (!Common::FS::Delete(filename)) { | ||
| 394 | LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); | ||
| 395 | } | ||
| 396 | } | ||
| 397 | } | 59 | } |
| 398 | 60 | ||
| 399 | class FileEnvironment final : public Shader::Environment { | ||
| 400 | public: | ||
| 401 | void Deserialize(std::ifstream& file) { | ||
| 402 | u64 code_size{}; | ||
| 403 | u64 num_texture_types{}; | ||
| 404 | u64 num_cbuf_values{}; | ||
| 405 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||
| 406 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 407 | .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 408 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 409 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||
| 410 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | ||
| 411 | .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) | ||
| 412 | .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) | ||
| 413 | .read(reinterpret_cast<char*>(&stage), sizeof(stage)); | ||
| 414 | code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); | ||
| 415 | file.read(reinterpret_cast<char*>(code.get()), code_size); | ||
| 416 | for (size_t i = 0; i < num_texture_types; ++i) { | ||
| 417 | u32 key; | ||
| 418 | Shader::TextureType type; | ||
| 419 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 420 | .read(reinterpret_cast<char*>(&type), sizeof(type)); | ||
| 421 | texture_types.emplace(key, type); | ||
| 422 | } | ||
| 423 | for (size_t i = 0; i < num_cbuf_values; ++i) { | ||
| 424 | u64 key; | ||
| 425 | u32 value; | ||
| 426 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 427 | .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||
| 428 | cbuf_values.emplace(key, value); | ||
| 429 | } | ||
| 430 | if (stage == Shader::Stage::Compute) { | ||
| 431 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 432 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 433 | } else { | ||
| 434 | file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | |||
| 438 | u64 ReadInstruction(u32 address) override { | ||
| 439 | if (address < read_lowest || address > read_highest) { | ||
| 440 | throw Shader::LogicError("Out of bounds address {}", address); | ||
| 441 | } | ||
| 442 | return code[(address - read_lowest) / sizeof(u64)]; | ||
| 443 | } | ||
| 444 | |||
| 445 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { | ||
| 446 | const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; | ||
| 447 | if (it == cbuf_values.end()) { | ||
| 448 | throw Shader::LogicError("Uncached read texture type"); | ||
| 449 | } | ||
| 450 | return it->second; | ||
| 451 | } | ||
| 452 | |||
| 453 | Shader::TextureType ReadTextureType(u32 handle) override { | ||
| 454 | const auto it{texture_types.find(handle)}; | ||
| 455 | if (it == texture_types.end()) { | ||
| 456 | throw Shader::LogicError("Uncached read texture type"); | ||
| 457 | } | ||
| 458 | return it->second; | ||
| 459 | } | ||
| 460 | |||
| 461 | u32 LocalMemorySize() const override { | ||
| 462 | return local_memory_size; | ||
| 463 | } | ||
| 464 | |||
| 465 | u32 SharedMemorySize() const override { | ||
| 466 | return shared_memory_size; | ||
| 467 | } | ||
| 468 | |||
| 469 | u32 TextureBoundBuffer() const override { | ||
| 470 | return texture_bound; | ||
| 471 | } | ||
| 472 | |||
| 473 | std::array<u32, 3> WorkgroupSize() const override { | ||
| 474 | return workgroup_size; | ||
| 475 | } | ||
| 476 | |||
| 477 | private: | ||
| 478 | std::unique_ptr<u64[]> code; | ||
| 479 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 480 | std::unordered_map<u64, u32> cbuf_values; | ||
| 481 | std::array<u32, 3> workgroup_size{}; | ||
| 482 | u32 local_memory_size{}; | ||
| 483 | u32 shared_memory_size{}; | ||
| 484 | u32 texture_bound{}; | ||
| 485 | u32 read_lowest{}; | ||
| 486 | u32 read_highest{}; | ||
| 487 | }; | ||
| 488 | |||
| 489 | Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { | 61 | Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { |
| 490 | switch (comparison) { | 62 | switch (comparison) { |
| 491 | case Maxwell::ComparisonOp::Never: | 63 | case Maxwell::ComparisonOp::Never: |
| @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso | |||
| 518 | } | 90 | } |
| 519 | } // Anonymous namespace | 91 | } // Anonymous namespace |
| 520 | 92 | ||
| 521 | void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||
| 522 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 523 | if (title_id == 0) { | ||
| 524 | return; | ||
| 525 | } | ||
| 526 | std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; | ||
| 527 | std::string base_dir{shader_dir + "/vulkan"}; | ||
| 528 | std::string transferable_dir{base_dir + "/transferable"}; | ||
| 529 | std::string precompiled_dir{base_dir + "/precompiled"}; | ||
| 530 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || | ||
| 531 | !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { | ||
| 532 | LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); | ||
| 533 | return; | ||
| 534 | } | ||
| 535 | pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); | ||
| 536 | |||
| 537 | struct { | ||
| 538 | std::mutex mutex; | ||
| 539 | size_t total{0}; | ||
| 540 | size_t built{0}; | ||
| 541 | bool has_loaded{false}; | ||
| 542 | } state; | ||
| 543 | |||
| 544 | std::ifstream file; | ||
| 545 | Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); | ||
| 546 | if (!file.is_open()) { | ||
| 547 | return; | ||
| 548 | } | ||
| 549 | file.exceptions(std::ifstream::failbit); | ||
| 550 | const auto end{file.tellg()}; | ||
| 551 | file.seekg(0, std::ios::beg); | ||
| 552 | |||
| 553 | std::array<char, 8> magic_number; | ||
| 554 | u32 cache_version; | ||
| 555 | file.read(magic_number.data(), magic_number.size()) | ||
| 556 | .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); | ||
| 557 | if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { | ||
| 558 | file.close(); | ||
| 559 | if (Common::FS::Delete(pipeline_cache_filename)) { | ||
| 560 | if (magic_number != MAGIC_NUMBER) { | ||
| 561 | LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); | ||
| 562 | } | ||
| 563 | if (cache_version != CACHE_VERSION) { | ||
| 564 | LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); | ||
| 565 | } | ||
| 566 | } else { | ||
| 567 | LOG_ERROR(Render_Vulkan, | ||
| 568 | "Invalid pipeline cache file and failed to delete it in \"{}\"", | ||
| 569 | pipeline_cache_filename); | ||
| 570 | } | ||
| 571 | return; | ||
| 572 | } | ||
| 573 | while (file.tellg() != end) { | ||
| 574 | if (stop_loading) { | ||
| 575 | return; | ||
| 576 | } | ||
| 577 | u32 num_envs{}; | ||
| 578 | file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); | ||
| 579 | std::vector<FileEnvironment> envs(num_envs); | ||
| 580 | for (FileEnvironment& env : envs) { | ||
| 581 | env.Deserialize(file); | ||
| 582 | } | ||
| 583 | if (envs.front().ShaderStage() == Shader::Stage::Compute) { | ||
| 584 | ComputePipelineCacheKey key; | ||
| 585 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 586 | |||
| 587 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 588 | ShaderPools pools; | ||
| 589 | auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; | ||
| 590 | |||
| 591 | std::lock_guard lock{state.mutex}; | ||
| 592 | compute_cache.emplace(key, std::move(pipeline)); | ||
| 593 | ++state.built; | ||
| 594 | if (state.has_loaded) { | ||
| 595 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 596 | } | ||
| 597 | }); | ||
| 598 | } else { | ||
| 599 | GraphicsPipelineCacheKey key; | ||
| 600 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 601 | |||
| 602 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 603 | ShaderPools pools; | ||
| 604 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 605 | for (auto& env : envs) { | ||
| 606 | env_ptrs.push_back(&env); | ||
| 607 | } | ||
| 608 | auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; | ||
| 609 | |||
| 610 | std::lock_guard lock{state.mutex}; | ||
| 611 | graphics_cache.emplace(key, std::move(pipeline)); | ||
| 612 | ++state.built; | ||
| 613 | if (state.has_loaded) { | ||
| 614 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 615 | } | ||
| 616 | }); | ||
| 617 | } | ||
| 618 | ++state.total; | ||
| 619 | } | ||
| 620 | { | ||
| 621 | std::lock_guard lock{state.mutex}; | ||
| 622 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); | ||
| 623 | state.has_loaded = true; | ||
| 624 | } | ||
| 625 | workers.WaitForRequests(); | ||
| 626 | } | ||
| 627 | |||
| 628 | size_t ComputePipelineCacheKey::Hash() const noexcept { | 93 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| 629 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 94 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 630 | return static_cast<size_t>(hash); | 95 | return static_cast<size_t>(hash); |
| @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | |||
| 643 | return std::memcmp(&rhs, this, Size()) == 0; | 108 | return std::memcmp(&rhs, this, Size()) == 0; |
| 644 | } | 109 | } |
| 645 | 110 | ||
| 646 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 111 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 647 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 648 | Tegra::Engines::KeplerCompute& kepler_compute_, | 112 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 649 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 113 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 650 | VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, | 114 | VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 651 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 115 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 652 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | 116 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 653 | TextureCache& texture_cache_) | 117 | TextureCache& texture_cache_) |
| 654 | : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | 118 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, |
| 655 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | 119 | device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, |
| 656 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, | ||
| 657 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, | 120 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, |
| 658 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, | 121 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, |
| 659 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), | 122 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), |
| @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; | |||
| 700 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | 163 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { |
| 701 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 164 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 702 | 165 | ||
| 703 | if (!RefreshStages()) { | 166 | if (!RefreshStages(graphics_key.unique_hashes)) { |
| 704 | current_pipeline = nullptr; | 167 | current_pipeline = nullptr; |
| 705 | return nullptr; | 168 | return nullptr; |
| 706 | } | 169 | } |
| @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | |||
| 728 | ComputePipeline* PipelineCache::CurrentComputePipeline() { | 191 | ComputePipeline* PipelineCache::CurrentComputePipeline() { |
| 729 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 192 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 730 | 193 | ||
| 731 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | 194 | const ShaderInfo* const shader{ComputeShader()}; |
| 732 | const auto& qmd{kepler_compute.launch_description}; | ||
| 733 | const GPUVAddr shader_addr{program_base + qmd.program_start}; | ||
| 734 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 735 | if (!cpu_shader_addr) { | ||
| 736 | return nullptr; | ||
| 737 | } | ||
| 738 | const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; | ||
| 739 | if (!shader) { | 195 | if (!shader) { |
| 740 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | 196 | return nullptr; |
| 741 | shader = MakeShaderInfo(env, *cpu_shader_addr); | ||
| 742 | } | 197 | } |
| 198 | const auto& qmd{kepler_compute.launch_description}; | ||
| 743 | const ComputePipelineCacheKey key{ | 199 | const ComputePipelineCacheKey key{ |
| 744 | .unique_hash{shader->unique_hash}, | 200 | .unique_hash = shader->unique_hash, |
| 745 | .shared_memory_size{qmd.shared_alloc}, | 201 | .shared_memory_size = qmd.shared_alloc, |
| 746 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, | 202 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, |
| 747 | }; | 203 | }; |
| 748 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; | 204 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; |
| @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { | |||
| 754 | return pipeline.get(); | 210 | return pipeline.get(); |
| 755 | } | 211 | } |
| 756 | 212 | ||
| 757 | bool PipelineCache::RefreshStages() { | 213 | void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 758 | auto& dirty{maxwell3d.dirty.flags}; | 214 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 759 | if (!dirty[VideoCommon::Dirty::Shaders]) { | 215 | if (title_id == 0) { |
| 760 | return last_valid_shaders; | 216 | return; |
| 761 | } | 217 | } |
| 762 | dirty[VideoCommon::Dirty::Shaders] = false; | 218 | auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; |
| 763 | 219 | auto base_dir{shader_dir / "vulkan"}; | |
| 764 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 220 | auto transferable_dir{base_dir / "transferable"}; |
| 765 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 221 | auto precompiled_dir{base_dir / "precompiled"}; |
| 766 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | 222 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || |
| 767 | graphics_key.unique_hashes[index] = u128{}; | 223 | !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { |
| 768 | continue; | 224 | LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); |
| 769 | } | 225 | return; |
| 770 | const auto& shader_config{maxwell3d.regs.shader_config[index]}; | ||
| 771 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 772 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | ||
| 773 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 774 | if (!cpu_shader_addr) { | ||
| 775 | LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 776 | last_valid_shaders = false; | ||
| 777 | return false; | ||
| 778 | } | ||
| 779 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; | ||
| 780 | if (!shader_info) { | ||
| 781 | const u32 start_address{shader_config.offset}; | ||
| 782 | GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 783 | shader_info = MakeShaderInfo(env, *cpu_shader_addr); | ||
| 784 | } | ||
| 785 | shader_infos[index] = shader_info; | ||
| 786 | graphics_key.unique_hashes[index] = shader_info->unique_hash; | ||
| 787 | } | 226 | } |
| 788 | last_valid_shaders = true; | 227 | pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); |
| 789 | return true; | ||
| 790 | } | ||
| 791 | 228 | ||
| 792 | const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { | 229 | struct { |
| 793 | auto info = std::make_unique<ShaderInfo>(); | 230 | std::mutex mutex; |
| 794 | if (const std::optional<u128> cached_hash{env.Analyze()}) { | 231 | size_t total{0}; |
| 795 | info->unique_hash = *cached_hash; | 232 | size_t built{0}; |
| 796 | info->size_bytes = env.CachedSize(); | 233 | bool has_loaded{false}; |
| 797 | } else { | 234 | } state; |
| 798 | // Slow path, not really hit on commercial games | 235 | |
| 799 | // Build a control flow graph to get the real shader size | 236 | const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { |
| 800 | main_pools.flow_block.ReleaseContents(); | 237 | ComputePipelineCacheKey key; |
| 801 | Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; | 238 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); |
| 802 | info->unique_hash = env.CalculateHash(); | 239 | |
| 803 | info->size_bytes = env.ReadSize(); | 240 | workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { |
| 804 | } | 241 | ShaderPools pools; |
| 805 | const size_t size_bytes{info->size_bytes}; | 242 | auto pipeline{CreateComputePipeline(pools, key, env, false)}; |
| 806 | const ShaderInfo* const result{info.get()}; | 243 | |
| 807 | Register(std::move(info), cpu_addr, size_bytes); | 244 | std::lock_guard lock{state.mutex}; |
| 808 | return result; | 245 | compute_cache.emplace(key, std::move(pipeline)); |
| 246 | ++state.built; | ||
| 247 | if (state.has_loaded) { | ||
| 248 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 249 | } | ||
| 250 | }); | ||
| 251 | ++state.total; | ||
| 252 | }}; | ||
| 253 | const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { | ||
| 254 | GraphicsPipelineCacheKey key; | ||
| 255 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 256 | |||
| 257 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 258 | ShaderPools pools; | ||
| 259 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 260 | for (auto& env : envs) { | ||
| 261 | env_ptrs.push_back(&env); | ||
| 262 | } | ||
| 263 | auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; | ||
| 264 | |||
| 265 | std::lock_guard lock{state.mutex}; | ||
| 266 | graphics_cache.emplace(key, std::move(pipeline)); | ||
| 267 | ++state.built; | ||
| 268 | if (state.has_loaded) { | ||
| 269 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 270 | } | ||
| 271 | }); | ||
| 272 | ++state.total; | ||
| 273 | }}; | ||
| 274 | VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); | ||
| 275 | |||
| 276 | std::unique_lock lock{state.mutex}; | ||
| 277 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); | ||
| 278 | state.has_loaded = true; | ||
| 279 | lock.unlock(); | ||
| 280 | |||
| 281 | workers.WaitForRequests(); | ||
| 809 | } | 282 | } |
| 810 | 283 | ||
| 811 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | 284 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( |
| @@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 815 | size_t env_index{0}; | 288 | size_t env_index{0}; |
| 816 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | 289 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 817 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 290 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 818 | if (key.unique_hashes[index] == u128{}) { | 291 | if (key.unique_hashes[index] == 0) { |
| 819 | continue; | 292 | continue; |
| 820 | } | 293 | } |
| 821 | Shader::Environment& env{*envs[env_index]}; | 294 | Shader::Environment& env{*envs[env_index]}; |
| @@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 830 | 303 | ||
| 831 | u32 binding{0}; | 304 | u32 binding{0}; |
| 832 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 305 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 833 | if (key.unique_hashes[index] == u128{}) { | 306 | if (key.unique_hashes[index] == 0) { |
| 834 | continue; | 307 | continue; |
| 835 | } | 308 | } |
| 836 | UNIMPLEMENTED_IF(index == 0); | 309 | UNIMPLEMENTED_IF(index == 0); |
| @@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 844 | device.SaveShader(code); | 317 | device.SaveShader(code); |
| 845 | modules[stage_index] = BuildShader(device, code); | 318 | modules[stage_index] = BuildShader(device, code); |
| 846 | if (device.HasDebuggingToolAttached()) { | 319 | if (device.HasDebuggingToolAttached()) { |
| 847 | const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], | 320 | const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; |
| 848 | key.unique_hashes[index][1])}; | ||
| 849 | modules[stage_index].SetObjectNameEXT(name.c_str()); | 321 | modules[stage_index].SetObjectNameEXT(name.c_str()); |
| 850 | } | 322 | } |
| 851 | } | 323 | } |
| @@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 863 | 335 | ||
| 864 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 336 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; |
| 865 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 337 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 866 | if (graphics_key.unique_hashes[index] == u128{}) { | 338 | if (graphics_key.unique_hashes[index] == 0) { |
| 867 | continue; | 339 | continue; |
| 868 | } | 340 | } |
| 869 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 341 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 871 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | 343 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; |
| 872 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | 344 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; |
| 873 | env.SetCachedSize(shader_infos[index]->size_bytes); | 345 | env.SetCachedSize(shader_infos[index]->size_bytes); |
| 874 | |||
| 875 | envs.push_back(&env); | 346 | envs.push_back(&env); |
| 876 | } | 347 | } |
| 877 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; | 348 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; |
| @@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 882 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> | 353 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> |
| 883 | env_ptrs; | 354 | env_ptrs; |
| 884 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 355 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 885 | if (key.unique_hashes[index] != u128{}) { | 356 | if (key.unique_hashes[index] != 0) { |
| 886 | env_ptrs.push_back(&envs[index]); | 357 | env_ptrs.push_back(&envs[index]); |
| 887 | } | 358 | } |
| 888 | } | 359 | } |
| 889 | SerializePipeline(key, env_ptrs, pipeline_cache_filename); | 360 | VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); |
| 890 | }); | 361 | }); |
| 891 | return pipeline; | 362 | return pipeline; |
| 892 | } | 363 | } |
| @@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 902 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; | 373 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; |
| 903 | if (!pipeline_cache_filename.empty()) { | 374 | if (!pipeline_cache_filename.empty()) { |
| 904 | serialization_thread.QueueWork([this, key, env = std::move(env)] { | 375 | serialization_thread.QueueWork([this, key, env = std::move(env)] { |
| 905 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, | 376 | VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, |
| 906 | pipeline_cache_filename); | 377 | pipeline_cache_filename); |
| 907 | }); | 378 | }); |
| 908 | } | 379 | } |
| 909 | return pipeline; | 380 | return pipeline; |
| @@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 921 | device.SaveShader(code); | 392 | device.SaveShader(code); |
| 922 | vk::ShaderModule spv_module{BuildShader(device, code)}; | 393 | vk::ShaderModule spv_module{BuildShader(device, code)}; |
| 923 | if (device.HasDebuggingToolAttached()) { | 394 | if (device.HasDebuggingToolAttached()) { |
| 924 | const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; | 395 | const auto name{fmt::format("{:016x}", key.unique_hash)}; |
| 925 | spv_module.SetObjectNameEXT(name.c_str()); | 396 | spv_module.SetObjectNameEXT(name.c_str()); |
| 926 | } | 397 | } |
| 927 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | 398 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; |
| @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, | |||
| 1035 | Shader::Profile profile{base_profile}; | 506 | Shader::Profile profile{base_profile}; |
| 1036 | 507 | ||
| 1037 | const Shader::Stage stage{program.stage}; | 508 | const Shader::Stage stage{program.stage}; |
| 1038 | const bool has_geometry{key.unique_hashes[4] != u128{}}; | 509 | const bool has_geometry{key.unique_hashes[4] != 0}; |
| 1039 | const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; | 510 | const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; |
| 1040 | const float point_size{Common::BitCast<float>(key.state.point_size)}; | 511 | const float point_size{Common::BitCast<float>(key.state.point_size)}; |
| 1041 | switch (stage) { | 512 | switch (stage) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <filesystem> | ||
| 9 | #include <iosfwd> | 10 | #include <iosfwd> |
| 10 | #include <memory> | 11 | #include <memory> |
| 11 | #include <type_traits> | 12 | #include <type_traits> |
| @@ -42,7 +43,7 @@ namespace Vulkan { | |||
| 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 43 | 44 | ||
| 44 | struct ComputePipelineCacheKey { | 45 | struct ComputePipelineCacheKey { |
| 45 | u128 unique_hash; | 46 | u64 unique_hash; |
| 46 | u32 shared_memory_size; | 47 | u32 shared_memory_size; |
| 47 | std::array<u32, 3> workgroup_size; | 48 | std::array<u32, 3> workgroup_size; |
| 48 | 49 | ||
| @@ -76,16 +77,12 @@ namespace Vulkan { | |||
| 76 | class ComputePipeline; | 77 | class ComputePipeline; |
| 77 | class Device; | 78 | class Device; |
| 78 | class DescriptorPool; | 79 | class DescriptorPool; |
| 79 | class GenericEnvironment; | ||
| 80 | class RasterizerVulkan; | 80 | class RasterizerVulkan; |
| 81 | class RenderPassCache; | 81 | class RenderPassCache; |
| 82 | class VKScheduler; | 82 | class VKScheduler; |
| 83 | class VKUpdateDescriptorQueue; | 83 | class VKUpdateDescriptorQueue; |
| 84 | 84 | ||
| 85 | struct ShaderInfo { | 85 | using VideoCommon::ShaderInfo; |
| 86 | u128 unique_hash{}; | ||
| 87 | size_t size_bytes{}; | ||
| 88 | }; | ||
| 89 | 86 | ||
| 90 | struct ShaderPools { | 87 | struct ShaderPools { |
| 91 | void ReleaseContents() { | 88 | void ReleaseContents() { |
| @@ -99,17 +96,16 @@ struct ShaderPools { | |||
| 99 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | 96 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; |
| 100 | }; | 97 | }; |
| 101 | 98 | ||
| 102 | class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { | 99 | class PipelineCache : public VideoCommon::ShaderCache { |
| 103 | public: | 100 | public: |
| 104 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | 101 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, |
| 105 | Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 106 | Tegra::Engines::KeplerCompute& kepler_compute, | 102 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 107 | Tegra::MemoryManager& gpu_memory, const Device& device, | 103 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 108 | VKScheduler& scheduler, DescriptorPool& descriptor_pool, | 104 | VKScheduler& scheduler, DescriptorPool& descriptor_pool, |
| 109 | VKUpdateDescriptorQueue& update_descriptor_queue, | 105 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 110 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | 106 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
| 111 | TextureCache& texture_cache); | 107 | TextureCache& texture_cache); |
| 112 | ~PipelineCache() override; | 108 | ~PipelineCache(); |
| 113 | 109 | ||
| 114 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); | 110 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); |
| 115 | 111 | ||
| @@ -119,10 +115,6 @@ public: | |||
| 119 | const VideoCore::DiskResourceLoadCallback& callback); | 115 | const VideoCore::DiskResourceLoadCallback& callback); |
| 120 | 116 | ||
| 121 | private: | 117 | private: |
| 122 | bool RefreshStages(); | ||
| 123 | |||
| 124 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); | ||
| 125 | |||
| 126 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); | 118 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); |
| 127 | 119 | ||
| 128 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( | 120 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( |
| @@ -140,11 +132,6 @@ private: | |||
| 140 | Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, | 132 | Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, |
| 141 | const Shader::IR::Program& program); | 133 | const Shader::IR::Program& program); |
| 142 | 134 | ||
| 143 | Tegra::GPU& gpu; | ||
| 144 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 145 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 146 | Tegra::MemoryManager& gpu_memory; | ||
| 147 | |||
| 148 | const Device& device; | 135 | const Device& device; |
| 149 | VKScheduler& scheduler; | 136 | VKScheduler& scheduler; |
| 150 | DescriptorPool& descriptor_pool; | 137 | DescriptorPool& descriptor_pool; |
| @@ -156,16 +143,13 @@ private: | |||
| 156 | GraphicsPipelineCacheKey graphics_key{}; | 143 | GraphicsPipelineCacheKey graphics_key{}; |
| 157 | GraphicsPipeline* current_pipeline{}; | 144 | GraphicsPipeline* current_pipeline{}; |
| 158 | 145 | ||
| 159 | std::array<const ShaderInfo*, 6> shader_infos{}; | ||
| 160 | bool last_valid_shaders{}; | ||
| 161 | |||
| 162 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; | 146 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 163 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; | 147 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; |
| 164 | 148 | ||
| 165 | ShaderPools main_pools; | 149 | ShaderPools main_pools; |
| 166 | 150 | ||
| 167 | Shader::Profile base_profile; | 151 | Shader::Profile base_profile; |
| 168 | std::string pipeline_cache_filename; | 152 | std::filesystem::path pipeline_cache_filename; |
| 169 | 153 | ||
| 170 | Common::ThreadWorker workers; | 154 | Common::ThreadWorker workers; |
| 171 | Common::ThreadWorker serialization_thread; | 155 | Common::ThreadWorker serialization_thread; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 149 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 149 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 150 | update_descriptor_queue, descriptor_pool), | 150 | update_descriptor_queue, descriptor_pool), |
| 151 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 151 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 152 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 152 | pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 153 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, | 153 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, |
| 154 | texture_cache), | 154 | texture_cache), |
| 155 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, | 155 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, |