diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 58 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 719 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader_cache.cpp | 233 | ||||
| -rw-r--r-- | src/video_core/shader_cache.h | 198 | ||||
| -rw-r--r-- | src/video_core/shader_environment.cpp | 453 | ||||
| -rw-r--r-- | src/video_core/shader_environment.h | 198 |
12 files changed, 1095 insertions, 824 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3166a69dc..6e0e4b8f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -145,7 +145,10 @@ add_library(video_core STATIC | |||
| 145 | renderer_vulkan/vk_texture_cache.h | 145 | renderer_vulkan/vk_texture_cache.h |
| 146 | renderer_vulkan/vk_update_descriptor.cpp | 146 | renderer_vulkan/vk_update_descriptor.cpp |
| 147 | renderer_vulkan/vk_update_descriptor.h | 147 | renderer_vulkan/vk_update_descriptor.h |
| 148 | shader_cache.cpp | ||
| 148 | shader_cache.h | 149 | shader_cache.h |
| 150 | shader_environment.cpp | ||
| 151 | shader_environment.h | ||
| 149 | shader_notify.cpp | 152 | shader_notify.cpp |
| 150 | shader_notify.h | 153 | shader_notify.h |
| 151 | surface.cpp | 154 | surface.cpp |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1f58f8791..2fdcbe4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -217,7 +217,7 @@ private: | |||
| 217 | TextureCache texture_cache; | 217 | TextureCache texture_cache; |
| 218 | BufferCacheRuntime buffer_cache_runtime; | 218 | BufferCacheRuntime buffer_cache_runtime; |
| 219 | BufferCache buffer_cache; | 219 | BufferCache buffer_cache; |
| 220 | ShaderCacheOpenGL shader_cache; | 220 | ShaderCache shader_cache; |
| 221 | QueryCache query_cache; | 221 | QueryCache query_cache; |
| 222 | AccelerateDMA accelerate_dma; | 222 | AccelerateDMA accelerate_dma; |
| 223 | FenceManagerOpenGL fence_manager; | 223 | FenceManagerOpenGL fence_manager; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4dd166156..c3e490b40 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -29,18 +29,13 @@ | |||
| 29 | 29 | ||
| 30 | namespace OpenGL { | 30 | namespace OpenGL { |
| 31 | 31 | ||
| 32 | Shader::Shader() = default; | 32 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 33 | 33 | Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, | |
| 34 | Shader::~Shader() = default; | 34 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 35 | 35 | Tegra::MemoryManager& gpu_memory_, const Device& device_) | |
| 36 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | 36 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, |
| 37 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 37 | emu_window{emu_window_}, gpu{gpu_}, device{device_} {} |
| 38 | Tegra::Engines::Maxwell3D& maxwell3d_, | 38 | |
| 39 | Tegra::Engines::KeplerCompute& kepler_compute_, | 39 | ShaderCache::~ShaderCache() = default; |
| 40 | Tegra::MemoryManager& gpu_memory_, const Device& device_) | ||
| 41 | : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, | ||
| 42 | maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} | ||
| 43 | |||
| 44 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | ||
| 45 | 40 | ||
| 46 | } // namespace OpenGL | 41 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ad3d15a76..96520e17c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -36,27 +36,59 @@ class RasterizerOpenGL; | |||
| 36 | 36 | ||
| 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 38 | 38 | ||
| 39 | class Shader { | 39 | struct GraphicsProgramKey { |
| 40 | struct TransformFeedbackState { | ||
| 41 | struct Layout { | ||
| 42 | u32 stream; | ||
| 43 | u32 varying_count; | ||
| 44 | u32 stride; | ||
| 45 | }; | ||
| 46 | std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts; | ||
| 47 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings; | ||
| 48 | }; | ||
| 49 | |||
| 50 | std::array<u64, 6> unique_hashes; | ||
| 51 | std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||
| 52 | union { | ||
| 53 | u32 raw; | ||
| 54 | BitField<0, 1, u32> xfb_enabled; | ||
| 55 | BitField<1, 1, u32> early_z; | ||
| 56 | BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; | ||
| 57 | BitField<6, 2, u32> tessellation_primitive; | ||
| 58 | BitField<8, 2, u32> tessellation_spacing; | ||
| 59 | BitField<10, 1, u32> tessellation_clockwise; | ||
| 60 | }; | ||
| 61 | u32 padding; | ||
| 62 | TransformFeedbackState xfb_state; | ||
| 63 | |||
| 64 | [[nodiscard]] size_t Size() const noexcept { | ||
| 65 | if (xfb_enabled != 0) { | ||
| 66 | return sizeof(GraphicsProgramKey); | ||
| 67 | } else { | ||
| 68 | return offsetof(GraphicsProgramKey, padding); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | }; | ||
| 72 | static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>); | ||
| 73 | static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>); | ||
| 74 | static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>); | ||
| 75 | |||
| 76 | class GraphicsProgram { | ||
| 40 | public: | 77 | public: |
| 41 | explicit Shader(); | 78 | private: |
| 42 | ~Shader(); | ||
| 43 | }; | 79 | }; |
| 44 | 80 | ||
| 45 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | 81 | class ShaderCache : public VideoCommon::ShaderCache { |
| 46 | public: | 82 | public: |
| 47 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, | 83 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 48 | Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, | 84 | Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 49 | Tegra::Engines::Maxwell3D& maxwell3d_, | 85 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 50 | Tegra::Engines::KeplerCompute& kepler_compute_, | 86 | Tegra::MemoryManager& gpu_memory_, const Device& device_); |
| 51 | Tegra::MemoryManager& gpu_memory_, const Device& device_); | 87 | ~ShaderCache(); |
| 52 | ~ShaderCacheOpenGL() override; | ||
| 53 | 88 | ||
| 54 | private: | 89 | private: |
| 55 | Core::Frontend::EmuWindow& emu_window; | 90 | Core::Frontend::EmuWindow& emu_window; |
| 56 | Tegra::GPU& gpu; | 91 | Tegra::GPU& gpu; |
| 57 | Tegra::MemoryManager& gpu_memory; | ||
| 58 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 59 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 60 | const Device& device; | 92 | const Device& device; |
| 61 | }; | 93 | }; |
| 62 | 94 | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | namespace Vulkan { | 23 | namespace Vulkan { |
| 24 | 24 | ||
| 25 | struct GraphicsPipelineCacheKey { | 25 | struct GraphicsPipelineCacheKey { |
| 26 | std::array<u128, 6> unique_hashes; | 26 | std::array<u64, 6> unique_hashes; |
| 27 | FixedPipelineState state; | 27 | FixedPipelineState state; |
| 28 | 28 | ||
| 29 | size_t Hash() const noexcept; | 29 | size_t Hash() const noexcept; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -11,7 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | #include "common/bit_cast.h" | 12 | #include "common/bit_cast.h" |
| 13 | #include "common/cityhash.h" | 13 | #include "common/cityhash.h" |
| 14 | #include "common/file_util.h" | 14 | #include "common/fs/fs.h" |
| 15 | #include "common/fs/path_util.h" | ||
| 15 | #include "common/microprofile.h" | 16 | #include "common/microprofile.h" |
| 16 | #include "common/thread_worker.h" | 17 | #include "common/thread_worker.h" |
| 17 | #include "core/core.h" | 18 | #include "core/core.h" |
| @@ -36,6 +37,7 @@ | |||
| 36 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 37 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 37 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 38 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 38 | #include "video_core/shader_cache.h" | 39 | #include "video_core/shader_cache.h" |
| 40 | #include "video_core/shader_environment.h" | ||
| 39 | #include "video_core/shader_notify.h" | 41 | #include "video_core/shader_notify.h" |
| 40 | #include "video_core/vulkan_common/vulkan_device.h" | 42 | #include "video_core/vulkan_common/vulkan_device.h" |
| 41 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 43 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -43,449 +45,19 @@ | |||
| 43 | namespace Vulkan { | 45 | namespace Vulkan { |
| 44 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); | 46 | MICROPROFILE_DECLARE(Vulkan_PipelineCache); |
| 45 | 47 | ||
| 46 | template <typename Container> | ||
| 47 | auto MakeSpan(Container& container) { | ||
| 48 | return std::span(container.data(), container.size()); | ||
| 49 | } | ||
| 50 | |||
| 51 | static u64 MakeCbufKey(u32 index, u32 offset) { | ||
| 52 | return (static_cast<u64>(index) << 32) | offset; | ||
| 53 | } | ||
| 54 | |||
| 55 | class GenericEnvironment : public Shader::Environment { | ||
| 56 | public: | ||
| 57 | explicit GenericEnvironment() = default; | ||
| 58 | explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 59 | u32 start_address_) | ||
| 60 | : gpu_memory{&gpu_memory_}, program_base{program_base_} { | ||
| 61 | start_address = start_address_; | ||
| 62 | } | ||
| 63 | |||
| 64 | ~GenericEnvironment() override = default; | ||
| 65 | |||
| 66 | u32 TextureBoundBuffer() const final { | ||
| 67 | return texture_bound; | ||
| 68 | } | ||
| 69 | |||
| 70 | u32 LocalMemorySize() const final { | ||
| 71 | return local_memory_size; | ||
| 72 | } | ||
| 73 | |||
| 74 | u32 SharedMemorySize() const final { | ||
| 75 | return shared_memory_size; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::array<u32, 3> WorkgroupSize() const final { | ||
| 79 | return workgroup_size; | ||
| 80 | } | ||
| 81 | |||
| 82 | u64 ReadInstruction(u32 address) final { | ||
| 83 | read_lowest = std::min(read_lowest, address); | ||
| 84 | read_highest = std::max(read_highest, address); | ||
| 85 | |||
| 86 | if (address >= cached_lowest && address < cached_highest) { | ||
| 87 | return code[(address - cached_lowest) / INST_SIZE]; | ||
| 88 | } | ||
| 89 | has_unbound_instructions = true; | ||
| 90 | return gpu_memory->Read<u64>(program_base + address); | ||
| 91 | } | ||
| 92 | |||
| 93 | std::optional<u128> Analyze() { | ||
| 94 | const std::optional<u64> size{TryFindSize()}; | ||
| 95 | if (!size) { | ||
| 96 | return std::nullopt; | ||
| 97 | } | ||
| 98 | cached_lowest = start_address; | ||
| 99 | cached_highest = start_address + static_cast<u32>(*size); | ||
| 100 | return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size); | ||
| 101 | } | ||
| 102 | |||
| 103 | void SetCachedSize(size_t size_bytes) { | ||
| 104 | cached_lowest = start_address; | ||
| 105 | cached_highest = start_address + static_cast<u32>(size_bytes); | ||
| 106 | code.resize(CachedSize()); | ||
| 107 | gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); | ||
| 108 | } | ||
| 109 | |||
| 110 | [[nodiscard]] size_t CachedSize() const noexcept { | ||
| 111 | return cached_highest - cached_lowest + INST_SIZE; | ||
| 112 | } | ||
| 113 | |||
| 114 | [[nodiscard]] size_t ReadSize() const noexcept { | ||
| 115 | return read_highest - read_lowest + INST_SIZE; | ||
| 116 | } | ||
| 117 | |||
| 118 | [[nodiscard]] bool CanBeSerialized() const noexcept { | ||
| 119 | return !has_unbound_instructions; | ||
| 120 | } | ||
| 121 | |||
| 122 | [[nodiscard]] u128 CalculateHash() const { | ||
| 123 | const size_t size{ReadSize()}; | ||
| 124 | const auto data{std::make_unique<char[]>(size)}; | ||
| 125 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); | ||
| 126 | return Common::CityHash128(data.get(), size); | ||
| 127 | } | ||
| 128 | |||
| 129 | void Serialize(std::ofstream& file) const { | ||
| 130 | const u64 code_size{static_cast<u64>(CachedSize())}; | ||
| 131 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||
| 132 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||
| 133 | |||
| 134 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||
| 135 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 136 | .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 137 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 138 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||
| 139 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | ||
| 140 | .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) | ||
| 141 | .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) | ||
| 142 | .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) | ||
| 143 | .write(reinterpret_cast<const char*>(code.data()), code_size); | ||
| 144 | for (const auto [key, type] : texture_types) { | ||
| 145 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 146 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 147 | } | ||
| 148 | for (const auto [key, type] : cbuf_values) { | ||
| 149 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 150 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 151 | } | ||
| 152 | if (stage == Shader::Stage::Compute) { | ||
| 153 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 154 | .write(reinterpret_cast<const char*>(&shared_memory_size), | ||
| 155 | sizeof(shared_memory_size)); | ||
| 156 | } else { | ||
| 157 | file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | protected: | ||
| 162 | static constexpr size_t INST_SIZE = sizeof(u64); | ||
| 163 | |||
| 164 | std::optional<u64> TryFindSize() { | ||
| 165 | constexpr size_t BLOCK_SIZE = 0x1000; | ||
| 166 | constexpr size_t MAXIMUM_SIZE = 0x100000; | ||
| 167 | |||
| 168 | constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; | ||
| 169 | constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; | ||
| 170 | |||
| 171 | GPUVAddr guest_addr{program_base + start_address}; | ||
| 172 | size_t offset{0}; | ||
| 173 | size_t size{BLOCK_SIZE}; | ||
| 174 | while (size <= MAXIMUM_SIZE) { | ||
| 175 | code.resize(size / INST_SIZE); | ||
| 176 | u64* const data = code.data() + offset / INST_SIZE; | ||
| 177 | gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); | ||
| 178 | for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { | ||
| 179 | const u64 inst = data[index / INST_SIZE]; | ||
| 180 | if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { | ||
| 181 | return offset + index; | ||
| 182 | } | ||
| 183 | } | ||
| 184 | guest_addr += BLOCK_SIZE; | ||
| 185 | size += BLOCK_SIZE; | ||
| 186 | offset += BLOCK_SIZE; | ||
| 187 | } | ||
| 188 | return std::nullopt; | ||
| 189 | } | ||
| 190 | |||
| 191 | Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, | ||
| 192 | u32 raw) { | ||
| 193 | const TextureHandle handle{raw, via_header_index}; | ||
| 194 | const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; | ||
| 195 | Tegra::Texture::TICEntry entry; | ||
| 196 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | ||
| 197 | |||
| 198 | const Shader::TextureType result{[&] { | ||
| 199 | switch (entry.texture_type) { | ||
| 200 | case Tegra::Texture::TextureType::Texture1D: | ||
| 201 | return Shader::TextureType::Color1D; | ||
| 202 | case Tegra::Texture::TextureType::Texture2D: | ||
| 203 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 204 | return Shader::TextureType::Color2D; | ||
| 205 | case Tegra::Texture::TextureType::Texture3D: | ||
| 206 | return Shader::TextureType::Color3D; | ||
| 207 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 208 | return Shader::TextureType::ColorCube; | ||
| 209 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 210 | return Shader::TextureType::ColorArray1D; | ||
| 211 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 212 | return Shader::TextureType::ColorArray2D; | ||
| 213 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 214 | return Shader::TextureType::Buffer; | ||
| 215 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 216 | return Shader::TextureType::ColorArrayCube; | ||
| 217 | default: | ||
| 218 | throw Shader::NotImplementedException("Unknown texture type"); | ||
| 219 | } | ||
| 220 | }()}; | ||
| 221 | texture_types.emplace(raw, result); | ||
| 222 | return result; | ||
| 223 | } | ||
| 224 | |||
| 225 | Tegra::MemoryManager* gpu_memory{}; | ||
| 226 | GPUVAddr program_base{}; | ||
| 227 | |||
| 228 | std::vector<u64> code; | ||
| 229 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 230 | std::unordered_map<u64, u32> cbuf_values; | ||
| 231 | |||
| 232 | u32 local_memory_size{}; | ||
| 233 | u32 texture_bound{}; | ||
| 234 | u32 shared_memory_size{}; | ||
| 235 | std::array<u32, 3> workgroup_size{}; | ||
| 236 | |||
| 237 | u32 read_lowest = std::numeric_limits<u32>::max(); | ||
| 238 | u32 read_highest = 0; | ||
| 239 | |||
| 240 | u32 cached_lowest = std::numeric_limits<u32>::max(); | ||
| 241 | u32 cached_highest = 0; | ||
| 242 | |||
| 243 | bool has_unbound_instructions = false; | ||
| 244 | }; | ||
| 245 | |||
| 246 | namespace { | 48 | namespace { |
| 247 | using Shader::Backend::SPIRV::EmitSPIRV; | 49 | using Shader::Backend::SPIRV::EmitSPIRV; |
| 248 | using Shader::Maxwell::TranslateProgram; | 50 | using Shader::Maxwell::TranslateProgram; |
| 51 | using VideoCommon::ComputeEnvironment; | ||
| 52 | using VideoCommon::FileEnvironment; | ||
| 53 | using VideoCommon::GenericEnvironment; | ||
| 54 | using VideoCommon::GraphicsEnvironment; | ||
| 249 | 55 | ||
| 250 | // TODO: Move this to a separate file | 56 | template <typename Container> |
| 251 | constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; | 57 | auto MakeSpan(Container& container) { |
| 252 | constexpr u32 CACHE_VERSION{2}; | 58 | return std::span(container.data(), container.size()); |
| 253 | |||
| 254 | class GraphicsEnvironment final : public GenericEnvironment { | ||
| 255 | public: | ||
| 256 | explicit GraphicsEnvironment() = default; | ||
| 257 | explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 258 | Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, | ||
| 259 | GPUVAddr program_base_, u32 start_address_) | ||
| 260 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { | ||
| 261 | gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); | ||
| 262 | switch (program) { | ||
| 263 | case Maxwell::ShaderProgram::VertexA: | ||
| 264 | stage = Shader::Stage::VertexA; | ||
| 265 | stage_index = 0; | ||
| 266 | break; | ||
| 267 | case Maxwell::ShaderProgram::VertexB: | ||
| 268 | stage = Shader::Stage::VertexB; | ||
| 269 | stage_index = 0; | ||
| 270 | break; | ||
| 271 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 272 | stage = Shader::Stage::TessellationControl; | ||
| 273 | stage_index = 1; | ||
| 274 | break; | ||
| 275 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 276 | stage = Shader::Stage::TessellationEval; | ||
| 277 | stage_index = 2; | ||
| 278 | break; | ||
| 279 | case Maxwell::ShaderProgram::Geometry: | ||
| 280 | stage = Shader::Stage::Geometry; | ||
| 281 | stage_index = 3; | ||
| 282 | break; | ||
| 283 | case Maxwell::ShaderProgram::Fragment: | ||
| 284 | stage = Shader::Stage::Fragment; | ||
| 285 | stage_index = 4; | ||
| 286 | break; | ||
| 287 | default: | ||
| 288 | UNREACHABLE_MSG("Invalid program={}", program); | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | const u64 local_size{sph.LocalMemorySize()}; | ||
| 292 | ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||
| 293 | local_memory_size = static_cast<u32>(local_size); | ||
| 294 | texture_bound = maxwell3d->regs.tex_cb_index; | ||
| 295 | } | ||
| 296 | |||
| 297 | ~GraphicsEnvironment() override = default; | ||
| 298 | |||
| 299 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { | ||
| 300 | const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; | ||
| 301 | ASSERT(cbuf.enabled); | ||
| 302 | u32 value{}; | ||
| 303 | if (cbuf_offset < cbuf.size) { | ||
| 304 | value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); | ||
| 305 | } | ||
| 306 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 307 | return value; | ||
| 308 | } | ||
| 309 | |||
| 310 | Shader::TextureType ReadTextureType(u32 handle) override { | ||
| 311 | const auto& regs{maxwell3d->regs}; | ||
| 312 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 313 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); | ||
| 314 | } | ||
| 315 | |||
| 316 | private: | ||
| 317 | Tegra::Engines::Maxwell3D* maxwell3d{}; | ||
| 318 | size_t stage_index{}; | ||
| 319 | }; | ||
| 320 | |||
| 321 | class ComputeEnvironment final : public GenericEnvironment { | ||
| 322 | public: | ||
| 323 | explicit ComputeEnvironment() = default; | ||
| 324 | explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 325 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 326 | u32 start_address_) | ||
| 327 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ | ||
| 328 | &kepler_compute_} { | ||
| 329 | const auto& qmd{kepler_compute->launch_description}; | ||
| 330 | stage = Shader::Stage::Compute; | ||
| 331 | local_memory_size = qmd.local_pos_alloc; | ||
| 332 | texture_bound = kepler_compute->regs.tex_cb_index; | ||
| 333 | shared_memory_size = qmd.shared_alloc; | ||
| 334 | workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 335 | } | ||
| 336 | |||
| 337 | ~ComputeEnvironment() override = default; | ||
| 338 | |||
| 339 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { | ||
| 340 | const auto& qmd{kepler_compute->launch_description}; | ||
| 341 | ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); | ||
| 342 | const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; | ||
| 343 | u32 value{}; | ||
| 344 | if (cbuf_offset < cbuf.size) { | ||
| 345 | value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); | ||
| 346 | } | ||
| 347 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 348 | return value; | ||
| 349 | } | ||
| 350 | |||
| 351 | Shader::TextureType ReadTextureType(u32 handle) override { | ||
| 352 | const auto& regs{kepler_compute->regs}; | ||
| 353 | const auto& qmd{kepler_compute->launch_description}; | ||
| 354 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||
| 355 | } | ||
| 356 | |||
| 357 | private: | ||
| 358 | Tegra::Engines::KeplerCompute* kepler_compute{}; | ||
| 359 | }; | ||
| 360 | |||
| 361 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||
| 362 | std::ofstream& file) { | ||
| 363 | if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { | ||
| 364 | return; | ||
| 365 | } | ||
| 366 | const u32 num_envs{static_cast<u32>(envs.size())}; | ||
| 367 | file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); | ||
| 368 | for (const GenericEnvironment* const env : envs) { | ||
| 369 | env->Serialize(file); | ||
| 370 | } | ||
| 371 | file.write(key.data(), key.size_bytes()); | ||
| 372 | } | ||
| 373 | |||
| 374 | template <typename Key, typename Envs> | ||
| 375 | void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { | ||
| 376 | try { | ||
| 377 | std::ofstream file; | ||
| 378 | file.exceptions(std::ifstream::failbit); | ||
| 379 | Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); | ||
| 380 | if (!file.is_open()) { | ||
| 381 | LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); | ||
| 382 | return; | ||
| 383 | } | ||
| 384 | if (file.tellp() == 0) { | ||
| 385 | file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) | ||
| 386 | .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION)); | ||
| 387 | } | ||
| 388 | const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key)); | ||
| 389 | SerializePipeline(key_span, MakeSpan(envs), file); | ||
| 390 | |||
| 391 | } catch (const std::ios_base::failure& e) { | ||
| 392 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 393 | if (!Common::FS::Delete(filename)) { | ||
| 394 | LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); | ||
| 395 | } | ||
| 396 | } | ||
| 397 | } | 59 | } |
| 398 | 60 | ||
| 399 | class FileEnvironment final : public Shader::Environment { | ||
| 400 | public: | ||
| 401 | void Deserialize(std::ifstream& file) { | ||
| 402 | u64 code_size{}; | ||
| 403 | u64 num_texture_types{}; | ||
| 404 | u64 num_cbuf_values{}; | ||
| 405 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||
| 406 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 407 | .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 408 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 409 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||
| 410 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | ||
| 411 | .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) | ||
| 412 | .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) | ||
| 413 | .read(reinterpret_cast<char*>(&stage), sizeof(stage)); | ||
| 414 | code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); | ||
| 415 | file.read(reinterpret_cast<char*>(code.get()), code_size); | ||
| 416 | for (size_t i = 0; i < num_texture_types; ++i) { | ||
| 417 | u32 key; | ||
| 418 | Shader::TextureType type; | ||
| 419 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 420 | .read(reinterpret_cast<char*>(&type), sizeof(type)); | ||
| 421 | texture_types.emplace(key, type); | ||
| 422 | } | ||
| 423 | for (size_t i = 0; i < num_cbuf_values; ++i) { | ||
| 424 | u64 key; | ||
| 425 | u32 value; | ||
| 426 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 427 | .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||
| 428 | cbuf_values.emplace(key, value); | ||
| 429 | } | ||
| 430 | if (stage == Shader::Stage::Compute) { | ||
| 431 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 432 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 433 | } else { | ||
| 434 | file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | |||
| 438 | u64 ReadInstruction(u32 address) override { | ||
| 439 | if (address < read_lowest || address > read_highest) { | ||
| 440 | throw Shader::LogicError("Out of bounds address {}", address); | ||
| 441 | } | ||
| 442 | return code[(address - read_lowest) / sizeof(u64)]; | ||
| 443 | } | ||
| 444 | |||
| 445 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { | ||
| 446 | const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; | ||
| 447 | if (it == cbuf_values.end()) { | ||
| 448 | throw Shader::LogicError("Uncached read texture type"); | ||
| 449 | } | ||
| 450 | return it->second; | ||
| 451 | } | ||
| 452 | |||
| 453 | Shader::TextureType ReadTextureType(u32 handle) override { | ||
| 454 | const auto it{texture_types.find(handle)}; | ||
| 455 | if (it == texture_types.end()) { | ||
| 456 | throw Shader::LogicError("Uncached read texture type"); | ||
| 457 | } | ||
| 458 | return it->second; | ||
| 459 | } | ||
| 460 | |||
| 461 | u32 LocalMemorySize() const override { | ||
| 462 | return local_memory_size; | ||
| 463 | } | ||
| 464 | |||
| 465 | u32 SharedMemorySize() const override { | ||
| 466 | return shared_memory_size; | ||
| 467 | } | ||
| 468 | |||
| 469 | u32 TextureBoundBuffer() const override { | ||
| 470 | return texture_bound; | ||
| 471 | } | ||
| 472 | |||
| 473 | std::array<u32, 3> WorkgroupSize() const override { | ||
| 474 | return workgroup_size; | ||
| 475 | } | ||
| 476 | |||
| 477 | private: | ||
| 478 | std::unique_ptr<u64[]> code; | ||
| 479 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 480 | std::unordered_map<u64, u32> cbuf_values; | ||
| 481 | std::array<u32, 3> workgroup_size{}; | ||
| 482 | u32 local_memory_size{}; | ||
| 483 | u32 shared_memory_size{}; | ||
| 484 | u32 texture_bound{}; | ||
| 485 | u32 read_lowest{}; | ||
| 486 | u32 read_highest{}; | ||
| 487 | }; | ||
| 488 | |||
| 489 | Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { | 61 | Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { |
| 490 | switch (comparison) { | 62 | switch (comparison) { |
| 491 | case Maxwell::ComparisonOp::Never: | 63 | case Maxwell::ComparisonOp::Never: |
| @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso | |||
| 518 | } | 90 | } |
| 519 | } // Anonymous namespace | 91 | } // Anonymous namespace |
| 520 | 92 | ||
| 521 | void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||
| 522 | const VideoCore::DiskResourceLoadCallback& callback) { | ||
| 523 | if (title_id == 0) { | ||
| 524 | return; | ||
| 525 | } | ||
| 526 | std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; | ||
| 527 | std::string base_dir{shader_dir + "/vulkan"}; | ||
| 528 | std::string transferable_dir{base_dir + "/transferable"}; | ||
| 529 | std::string precompiled_dir{base_dir + "/precompiled"}; | ||
| 530 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || | ||
| 531 | !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { | ||
| 532 | LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); | ||
| 533 | return; | ||
| 534 | } | ||
| 535 | pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); | ||
| 536 | |||
| 537 | struct { | ||
| 538 | std::mutex mutex; | ||
| 539 | size_t total{0}; | ||
| 540 | size_t built{0}; | ||
| 541 | bool has_loaded{false}; | ||
| 542 | } state; | ||
| 543 | |||
| 544 | std::ifstream file; | ||
| 545 | Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); | ||
| 546 | if (!file.is_open()) { | ||
| 547 | return; | ||
| 548 | } | ||
| 549 | file.exceptions(std::ifstream::failbit); | ||
| 550 | const auto end{file.tellg()}; | ||
| 551 | file.seekg(0, std::ios::beg); | ||
| 552 | |||
| 553 | std::array<char, 8> magic_number; | ||
| 554 | u32 cache_version; | ||
| 555 | file.read(magic_number.data(), magic_number.size()) | ||
| 556 | .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); | ||
| 557 | if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { | ||
| 558 | file.close(); | ||
| 559 | if (Common::FS::Delete(pipeline_cache_filename)) { | ||
| 560 | if (magic_number != MAGIC_NUMBER) { | ||
| 561 | LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); | ||
| 562 | } | ||
| 563 | if (cache_version != CACHE_VERSION) { | ||
| 564 | LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); | ||
| 565 | } | ||
| 566 | } else { | ||
| 567 | LOG_ERROR(Render_Vulkan, | ||
| 568 | "Invalid pipeline cache file and failed to delete it in \"{}\"", | ||
| 569 | pipeline_cache_filename); | ||
| 570 | } | ||
| 571 | return; | ||
| 572 | } | ||
| 573 | while (file.tellg() != end) { | ||
| 574 | if (stop_loading) { | ||
| 575 | return; | ||
| 576 | } | ||
| 577 | u32 num_envs{}; | ||
| 578 | file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); | ||
| 579 | std::vector<FileEnvironment> envs(num_envs); | ||
| 580 | for (FileEnvironment& env : envs) { | ||
| 581 | env.Deserialize(file); | ||
| 582 | } | ||
| 583 | if (envs.front().ShaderStage() == Shader::Stage::Compute) { | ||
| 584 | ComputePipelineCacheKey key; | ||
| 585 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 586 | |||
| 587 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 588 | ShaderPools pools; | ||
| 589 | auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; | ||
| 590 | |||
| 591 | std::lock_guard lock{state.mutex}; | ||
| 592 | compute_cache.emplace(key, std::move(pipeline)); | ||
| 593 | ++state.built; | ||
| 594 | if (state.has_loaded) { | ||
| 595 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 596 | } | ||
| 597 | }); | ||
| 598 | } else { | ||
| 599 | GraphicsPipelineCacheKey key; | ||
| 600 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 601 | |||
| 602 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 603 | ShaderPools pools; | ||
| 604 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 605 | for (auto& env : envs) { | ||
| 606 | env_ptrs.push_back(&env); | ||
| 607 | } | ||
| 608 | auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; | ||
| 609 | |||
| 610 | std::lock_guard lock{state.mutex}; | ||
| 611 | graphics_cache.emplace(key, std::move(pipeline)); | ||
| 612 | ++state.built; | ||
| 613 | if (state.has_loaded) { | ||
| 614 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 615 | } | ||
| 616 | }); | ||
| 617 | } | ||
| 618 | ++state.total; | ||
| 619 | } | ||
| 620 | { | ||
| 621 | std::lock_guard lock{state.mutex}; | ||
| 622 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); | ||
| 623 | state.has_loaded = true; | ||
| 624 | } | ||
| 625 | workers.WaitForRequests(); | ||
| 626 | } | ||
| 627 | |||
| 628 | size_t ComputePipelineCacheKey::Hash() const noexcept { | 93 | size_t ComputePipelineCacheKey::Hash() const noexcept { |
| 629 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); | 94 | const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); |
| 630 | return static_cast<size_t>(hash); | 95 | return static_cast<size_t>(hash); |
| @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c | |||
| 643 | return std::memcmp(&rhs, this, Size()) == 0; | 108 | return std::memcmp(&rhs, this, Size()) == 0; |
| 644 | } | 109 | } |
| 645 | 110 | ||
| 646 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | 111 | PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 647 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 648 | Tegra::Engines::KeplerCompute& kepler_compute_, | 112 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 649 | Tegra::MemoryManager& gpu_memory_, const Device& device_, | 113 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 650 | VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, | 114 | VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, |
| 651 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 115 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 652 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, | 116 | RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, |
| 653 | TextureCache& texture_cache_) | 117 | TextureCache& texture_cache_) |
| 654 | : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, | 118 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, |
| 655 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, | 119 | device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, |
| 656 | scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, | ||
| 657 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, | 120 | update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, |
| 658 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, | 121 | buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, |
| 659 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), | 122 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), |
| @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; | |||
| 700 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | 163 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { |
| 701 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 164 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 702 | 165 | ||
| 703 | if (!RefreshStages()) { | 166 | if (!RefreshStages(graphics_key.unique_hashes)) { |
| 704 | current_pipeline = nullptr; | 167 | current_pipeline = nullptr; |
| 705 | return nullptr; | 168 | return nullptr; |
| 706 | } | 169 | } |
| @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | |||
| 728 | ComputePipeline* PipelineCache::CurrentComputePipeline() { | 191 | ComputePipeline* PipelineCache::CurrentComputePipeline() { |
| 729 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 192 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| 730 | 193 | ||
| 731 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | 194 | const ShaderInfo* const shader{ComputeShader()}; |
| 732 | const auto& qmd{kepler_compute.launch_description}; | ||
| 733 | const GPUVAddr shader_addr{program_base + qmd.program_start}; | ||
| 734 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 735 | if (!cpu_shader_addr) { | ||
| 736 | return nullptr; | ||
| 737 | } | ||
| 738 | const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; | ||
| 739 | if (!shader) { | 195 | if (!shader) { |
| 740 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | 196 | return nullptr; |
| 741 | shader = MakeShaderInfo(env, *cpu_shader_addr); | ||
| 742 | } | 197 | } |
| 198 | const auto& qmd{kepler_compute.launch_description}; | ||
| 743 | const ComputePipelineCacheKey key{ | 199 | const ComputePipelineCacheKey key{ |
| 744 | .unique_hash{shader->unique_hash}, | 200 | .unique_hash = shader->unique_hash, |
| 745 | .shared_memory_size{qmd.shared_alloc}, | 201 | .shared_memory_size = qmd.shared_alloc, |
| 746 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, | 202 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, |
| 747 | }; | 203 | }; |
| 748 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; | 204 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; |
| @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { | |||
| 754 | return pipeline.get(); | 210 | return pipeline.get(); |
| 755 | } | 211 | } |
| 756 | 212 | ||
| 757 | bool PipelineCache::RefreshStages() { | 213 | void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, |
| 758 | auto& dirty{maxwell3d.dirty.flags}; | 214 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 759 | if (!dirty[VideoCommon::Dirty::Shaders]) { | 215 | if (title_id == 0) { |
| 760 | return last_valid_shaders; | 216 | return; |
| 761 | } | 217 | } |
| 762 | dirty[VideoCommon::Dirty::Shaders] = false; | 218 | auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; |
| 763 | 219 | auto base_dir{shader_dir / "vulkan"}; | |
| 764 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 220 | auto transferable_dir{base_dir / "transferable"}; |
| 765 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 221 | auto precompiled_dir{base_dir / "precompiled"}; |
| 766 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | 222 | if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || |
| 767 | graphics_key.unique_hashes[index] = u128{}; | 223 | !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { |
| 768 | continue; | 224 | LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); |
| 769 | } | 225 | return; |
| 770 | const auto& shader_config{maxwell3d.regs.shader_config[index]}; | ||
| 771 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 772 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | ||
| 773 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 774 | if (!cpu_shader_addr) { | ||
| 775 | LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 776 | last_valid_shaders = false; | ||
| 777 | return false; | ||
| 778 | } | ||
| 779 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; | ||
| 780 | if (!shader_info) { | ||
| 781 | const u32 start_address{shader_config.offset}; | ||
| 782 | GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 783 | shader_info = MakeShaderInfo(env, *cpu_shader_addr); | ||
| 784 | } | ||
| 785 | shader_infos[index] = shader_info; | ||
| 786 | graphics_key.unique_hashes[index] = shader_info->unique_hash; | ||
| 787 | } | 226 | } |
| 788 | last_valid_shaders = true; | 227 | pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); |
| 789 | return true; | ||
| 790 | } | ||
| 791 | 228 | ||
| 792 | const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { | 229 | struct { |
| 793 | auto info = std::make_unique<ShaderInfo>(); | 230 | std::mutex mutex; |
| 794 | if (const std::optional<u128> cached_hash{env.Analyze()}) { | 231 | size_t total{0}; |
| 795 | info->unique_hash = *cached_hash; | 232 | size_t built{0}; |
| 796 | info->size_bytes = env.CachedSize(); | 233 | bool has_loaded{false}; |
| 797 | } else { | 234 | } state; |
| 798 | // Slow path, not really hit on commercial games | 235 | |
| 799 | // Build a control flow graph to get the real shader size | 236 | const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { |
| 800 | main_pools.flow_block.ReleaseContents(); | 237 | ComputePipelineCacheKey key; |
| 801 | Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; | 238 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); |
| 802 | info->unique_hash = env.CalculateHash(); | 239 | |
| 803 | info->size_bytes = env.ReadSize(); | 240 | workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { |
| 804 | } | 241 | ShaderPools pools; |
| 805 | const size_t size_bytes{info->size_bytes}; | 242 | auto pipeline{CreateComputePipeline(pools, key, env, false)}; |
| 806 | const ShaderInfo* const result{info.get()}; | 243 | |
| 807 | Register(std::move(info), cpu_addr, size_bytes); | 244 | std::lock_guard lock{state.mutex}; |
| 808 | return result; | 245 | compute_cache.emplace(key, std::move(pipeline)); |
| 246 | ++state.built; | ||
| 247 | if (state.has_loaded) { | ||
| 248 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 249 | } | ||
| 250 | }); | ||
| 251 | ++state.total; | ||
| 252 | }}; | ||
| 253 | const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { | ||
| 254 | GraphicsPipelineCacheKey key; | ||
| 255 | file.read(reinterpret_cast<char*>(&key), sizeof(key)); | ||
| 256 | |||
| 257 | workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { | ||
| 258 | ShaderPools pools; | ||
| 259 | boost::container::static_vector<Shader::Environment*, 5> env_ptrs; | ||
| 260 | for (auto& env : envs) { | ||
| 261 | env_ptrs.push_back(&env); | ||
| 262 | } | ||
| 263 | auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; | ||
| 264 | |||
| 265 | std::lock_guard lock{state.mutex}; | ||
| 266 | graphics_cache.emplace(key, std::move(pipeline)); | ||
| 267 | ++state.built; | ||
| 268 | if (state.has_loaded) { | ||
| 269 | callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); | ||
| 270 | } | ||
| 271 | }); | ||
| 272 | ++state.total; | ||
| 273 | }}; | ||
| 274 | VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); | ||
| 275 | |||
| 276 | std::unique_lock lock{state.mutex}; | ||
| 277 | callback(VideoCore::LoadCallbackStage::Build, 0, state.total); | ||
| 278 | state.has_loaded = true; | ||
| 279 | lock.unlock(); | ||
| 280 | |||
| 281 | workers.WaitForRequests(); | ||
| 809 | } | 282 | } |
| 810 | 283 | ||
| 811 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | 284 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( |
| @@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 815 | size_t env_index{0}; | 288 | size_t env_index{0}; |
| 816 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | 289 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; |
| 817 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 290 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 818 | if (key.unique_hashes[index] == u128{}) { | 291 | if (key.unique_hashes[index] == 0) { |
| 819 | continue; | 292 | continue; |
| 820 | } | 293 | } |
| 821 | Shader::Environment& env{*envs[env_index]}; | 294 | Shader::Environment& env{*envs[env_index]}; |
| @@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 830 | 303 | ||
| 831 | u32 binding{0}; | 304 | u32 binding{0}; |
| 832 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 305 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 833 | if (key.unique_hashes[index] == u128{}) { | 306 | if (key.unique_hashes[index] == 0) { |
| 834 | continue; | 307 | continue; |
| 835 | } | 308 | } |
| 836 | UNIMPLEMENTED_IF(index == 0); | 309 | UNIMPLEMENTED_IF(index == 0); |
| @@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 844 | device.SaveShader(code); | 317 | device.SaveShader(code); |
| 845 | modules[stage_index] = BuildShader(device, code); | 318 | modules[stage_index] = BuildShader(device, code); |
| 846 | if (device.HasDebuggingToolAttached()) { | 319 | if (device.HasDebuggingToolAttached()) { |
| 847 | const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], | 320 | const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; |
| 848 | key.unique_hashes[index][1])}; | ||
| 849 | modules[stage_index].SetObjectNameEXT(name.c_str()); | 321 | modules[stage_index].SetObjectNameEXT(name.c_str()); |
| 850 | } | 322 | } |
| 851 | } | 323 | } |
| @@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 863 | 335 | ||
| 864 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 336 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; |
| 865 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 337 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 866 | if (graphics_key.unique_hashes[index] == u128{}) { | 338 | if (graphics_key.unique_hashes[index] == 0) { |
| 867 | continue; | 339 | continue; |
| 868 | } | 340 | } |
| 869 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 341 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 871 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | 343 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; |
| 872 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | 344 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; |
| 873 | env.SetCachedSize(shader_infos[index]->size_bytes); | 345 | env.SetCachedSize(shader_infos[index]->size_bytes); |
| 874 | |||
| 875 | envs.push_back(&env); | 346 | envs.push_back(&env); |
| 876 | } | 347 | } |
| 877 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; | 348 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; |
| @@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | |||
| 882 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> | 353 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> |
| 883 | env_ptrs; | 354 | env_ptrs; |
| 884 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 355 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 885 | if (key.unique_hashes[index] != u128{}) { | 356 | if (key.unique_hashes[index] != 0) { |
| 886 | env_ptrs.push_back(&envs[index]); | 357 | env_ptrs.push_back(&envs[index]); |
| 887 | } | 358 | } |
| 888 | } | 359 | } |
| 889 | SerializePipeline(key, env_ptrs, pipeline_cache_filename); | 360 | VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); |
| 890 | }); | 361 | }); |
| 891 | return pipeline; | 362 | return pipeline; |
| 892 | } | 363 | } |
| @@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 902 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; | 373 | auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; |
| 903 | if (!pipeline_cache_filename.empty()) { | 374 | if (!pipeline_cache_filename.empty()) { |
| 904 | serialization_thread.QueueWork([this, key, env = std::move(env)] { | 375 | serialization_thread.QueueWork([this, key, env = std::move(env)] { |
| 905 | SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, | 376 | VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, |
| 906 | pipeline_cache_filename); | 377 | pipeline_cache_filename); |
| 907 | }); | 378 | }); |
| 908 | } | 379 | } |
| 909 | return pipeline; | 380 | return pipeline; |
| @@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 921 | device.SaveShader(code); | 392 | device.SaveShader(code); |
| 922 | vk::ShaderModule spv_module{BuildShader(device, code)}; | 393 | vk::ShaderModule spv_module{BuildShader(device, code)}; |
| 923 | if (device.HasDebuggingToolAttached()) { | 394 | if (device.HasDebuggingToolAttached()) { |
| 924 | const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; | 395 | const auto name{fmt::format("{:016x}", key.unique_hash)}; |
| 925 | spv_module.SetObjectNameEXT(name.c_str()); | 396 | spv_module.SetObjectNameEXT(name.c_str()); |
| 926 | } | 397 | } |
| 927 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | 398 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; |
| @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, | |||
| 1035 | Shader::Profile profile{base_profile}; | 506 | Shader::Profile profile{base_profile}; |
| 1036 | 507 | ||
| 1037 | const Shader::Stage stage{program.stage}; | 508 | const Shader::Stage stage{program.stage}; |
| 1038 | const bool has_geometry{key.unique_hashes[4] != u128{}}; | 509 | const bool has_geometry{key.unique_hashes[4] != 0}; |
| 1039 | const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; | 510 | const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; |
| 1040 | const float point_size{Common::BitCast<float>(key.state.point_size)}; | 511 | const float point_size{Common::BitCast<float>(key.state.point_size)}; |
| 1041 | switch (stage) { | 512 | switch (stage) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | 8 | #include <cstddef> |
| 9 | #include <filesystem> | ||
| 9 | #include <iosfwd> | 10 | #include <iosfwd> |
| 10 | #include <memory> | 11 | #include <memory> |
| 11 | #include <type_traits> | 12 | #include <type_traits> |
| @@ -42,7 +43,7 @@ namespace Vulkan { | |||
| 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 43 | 44 | ||
| 44 | struct ComputePipelineCacheKey { | 45 | struct ComputePipelineCacheKey { |
| 45 | u128 unique_hash; | 46 | u64 unique_hash; |
| 46 | u32 shared_memory_size; | 47 | u32 shared_memory_size; |
| 47 | std::array<u32, 3> workgroup_size; | 48 | std::array<u32, 3> workgroup_size; |
| 48 | 49 | ||
| @@ -76,16 +77,12 @@ namespace Vulkan { | |||
| 76 | class ComputePipeline; | 77 | class ComputePipeline; |
| 77 | class Device; | 78 | class Device; |
| 78 | class DescriptorPool; | 79 | class DescriptorPool; |
| 79 | class GenericEnvironment; | ||
| 80 | class RasterizerVulkan; | 80 | class RasterizerVulkan; |
| 81 | class RenderPassCache; | 81 | class RenderPassCache; |
| 82 | class VKScheduler; | 82 | class VKScheduler; |
| 83 | class VKUpdateDescriptorQueue; | 83 | class VKUpdateDescriptorQueue; |
| 84 | 84 | ||
| 85 | struct ShaderInfo { | 85 | using VideoCommon::ShaderInfo; |
| 86 | u128 unique_hash{}; | ||
| 87 | size_t size_bytes{}; | ||
| 88 | }; | ||
| 89 | 86 | ||
| 90 | struct ShaderPools { | 87 | struct ShaderPools { |
| 91 | void ReleaseContents() { | 88 | void ReleaseContents() { |
| @@ -99,17 +96,16 @@ struct ShaderPools { | |||
| 99 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | 96 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; |
| 100 | }; | 97 | }; |
| 101 | 98 | ||
| 102 | class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { | 99 | class PipelineCache : public VideoCommon::ShaderCache { |
| 103 | public: | 100 | public: |
| 104 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, | 101 | explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, |
| 105 | Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 106 | Tegra::Engines::KeplerCompute& kepler_compute, | 102 | Tegra::Engines::KeplerCompute& kepler_compute, |
| 107 | Tegra::MemoryManager& gpu_memory, const Device& device, | 103 | Tegra::MemoryManager& gpu_memory, const Device& device, |
| 108 | VKScheduler& scheduler, DescriptorPool& descriptor_pool, | 104 | VKScheduler& scheduler, DescriptorPool& descriptor_pool, |
| 109 | VKUpdateDescriptorQueue& update_descriptor_queue, | 105 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 110 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, | 106 | RenderPassCache& render_pass_cache, BufferCache& buffer_cache, |
| 111 | TextureCache& texture_cache); | 107 | TextureCache& texture_cache); |
| 112 | ~PipelineCache() override; | 108 | ~PipelineCache(); |
| 113 | 109 | ||
| 114 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); | 110 | [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); |
| 115 | 111 | ||
| @@ -119,10 +115,6 @@ public: | |||
| 119 | const VideoCore::DiskResourceLoadCallback& callback); | 115 | const VideoCore::DiskResourceLoadCallback& callback); |
| 120 | 116 | ||
| 121 | private: | 117 | private: |
| 122 | bool RefreshStages(); | ||
| 123 | |||
| 124 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); | ||
| 125 | |||
| 126 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); | 118 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); |
| 127 | 119 | ||
| 128 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( | 120 | std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( |
| @@ -140,11 +132,6 @@ private: | |||
| 140 | Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, | 132 | Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, |
| 141 | const Shader::IR::Program& program); | 133 | const Shader::IR::Program& program); |
| 142 | 134 | ||
| 143 | Tegra::GPU& gpu; | ||
| 144 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 145 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 146 | Tegra::MemoryManager& gpu_memory; | ||
| 147 | |||
| 148 | const Device& device; | 135 | const Device& device; |
| 149 | VKScheduler& scheduler; | 136 | VKScheduler& scheduler; |
| 150 | DescriptorPool& descriptor_pool; | 137 | DescriptorPool& descriptor_pool; |
| @@ -156,16 +143,13 @@ private: | |||
| 156 | GraphicsPipelineCacheKey graphics_key{}; | 143 | GraphicsPipelineCacheKey graphics_key{}; |
| 157 | GraphicsPipeline* current_pipeline{}; | 144 | GraphicsPipeline* current_pipeline{}; |
| 158 | 145 | ||
| 159 | std::array<const ShaderInfo*, 6> shader_infos{}; | ||
| 160 | bool last_valid_shaders{}; | ||
| 161 | |||
| 162 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; | 146 | std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 163 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; | 147 | std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; |
| 164 | 148 | ||
| 165 | ShaderPools main_pools; | 149 | ShaderPools main_pools; |
| 166 | 150 | ||
| 167 | Shader::Profile base_profile; | 151 | Shader::Profile base_profile; |
| 168 | std::string pipeline_cache_filename; | 152 | std::filesystem::path pipeline_cache_filename; |
| 169 | 153 | ||
| 170 | Common::ThreadWorker workers; | 154 | Common::ThreadWorker workers; |
| 171 | Common::ThreadWorker serialization_thread; | 155 | Common::ThreadWorker serialization_thread; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 149 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | 149 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, |
| 150 | update_descriptor_queue, descriptor_pool), | 150 | update_descriptor_queue, descriptor_pool), |
| 151 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 151 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 152 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 152 | pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 153 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, | 153 | descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, |
| 154 | texture_cache), | 154 | texture_cache), |
| 155 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, | 155 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..b8b8eace5 --- /dev/null +++ b/src/video_core/shader_cache.cpp | |||
| @@ -0,0 +1,233 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 11 | #include "shader_recompiler/object_pool.h" | ||
| 12 | #include "video_core/dirty_flags.h" | ||
| 13 | #include "video_core/engines/kepler_compute.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/memory_manager.h" | ||
| 16 | #include "video_core/shader_cache.h" | ||
| 17 | #include "video_core/shader_environment.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { | ||
| 22 | std::scoped_lock lock{invalidation_mutex}; | ||
| 23 | InvalidatePagesInRegion(addr, size); | ||
| 24 | RemovePendingShaders(); | ||
| 25 | } | ||
| 26 | |||
| 27 | void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { | ||
| 28 | std::lock_guard lock{invalidation_mutex}; | ||
| 29 | InvalidatePagesInRegion(addr, size); | ||
| 30 | } | ||
| 31 | |||
| 32 | void ShaderCache::SyncGuestHost() { | ||
| 33 | std::scoped_lock lock{invalidation_mutex}; | ||
| 34 | RemovePendingShaders(); | ||
| 35 | } | ||
| 36 | |||
| 37 | ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 38 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 39 | Tegra::Engines::KeplerCompute& kepler_compute_) | ||
| 40 | : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | ||
| 41 | rasterizer{rasterizer_} {} | ||
| 42 | |||
| 43 | bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { | ||
| 44 | auto& dirty{maxwell3d.dirty.flags}; | ||
| 45 | if (!dirty[VideoCommon::Dirty::Shaders]) { | ||
| 46 | return last_shaders_valid; | ||
| 47 | } | ||
| 48 | dirty[VideoCommon::Dirty::Shaders] = false; | ||
| 49 | |||
| 50 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||
| 51 | for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { | ||
| 52 | if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { | ||
| 53 | unique_hashes[index] = 0; | ||
| 54 | continue; | ||
| 55 | } | ||
| 56 | const auto& shader_config{maxwell3d.regs.shader_config[index]}; | ||
| 57 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | ||
| 58 | const GPUVAddr shader_addr{base_addr + shader_config.offset}; | ||
| 59 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 60 | if (!cpu_shader_addr) { | ||
| 61 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 62 | last_shaders_valid = false; | ||
| 63 | return false; | ||
| 64 | } | ||
| 65 | const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; | ||
| 66 | if (!shader_info) { | ||
| 67 | const u32 start_address{shader_config.offset}; | ||
| 68 | GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 69 | shader_info = MakeShaderInfo(env, *cpu_shader_addr); | ||
| 70 | } | ||
| 71 | shader_infos[index] = shader_info; | ||
| 72 | unique_hashes[index] = shader_info->unique_hash; | ||
| 73 | } | ||
| 74 | last_shaders_valid = true; | ||
| 75 | return true; | ||
| 76 | } | ||
| 77 | |||
| 78 | const ShaderInfo* ShaderCache::ComputeShader() { | ||
| 79 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | ||
| 80 | const auto& qmd{kepler_compute.launch_description}; | ||
| 81 | const GPUVAddr shader_addr{program_base + qmd.program_start}; | ||
| 82 | const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; | ||
| 83 | if (!cpu_shader_addr) { | ||
| 84 | LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); | ||
| 85 | return nullptr; | ||
| 86 | } | ||
| 87 | if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { | ||
| 88 | return shader; | ||
| 89 | } | ||
| 90 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | ||
| 91 | return MakeShaderInfo(env, *cpu_shader_addr); | ||
| 92 | } | ||
| 93 | |||
| 94 | ShaderInfo* ShaderCache::TryGet(VAddr addr) const { | ||
| 95 | std::scoped_lock lock{lookup_mutex}; | ||
| 96 | |||
| 97 | const auto it = lookup_cache.find(addr); | ||
| 98 | if (it == lookup_cache.end()) { | ||
| 99 | return nullptr; | ||
| 100 | } | ||
| 101 | return it->second->data; | ||
| 102 | } | ||
| 103 | |||
| 104 | void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) { | ||
| 105 | std::scoped_lock lock{invalidation_mutex, lookup_mutex}; | ||
| 106 | |||
| 107 | const VAddr addr_end = addr + size; | ||
| 108 | Entry* const entry = NewEntry(addr, addr_end, data.get()); | ||
| 109 | |||
| 110 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 111 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 112 | invalidation_cache[page].push_back(entry); | ||
| 113 | } | ||
| 114 | |||
| 115 | storage.push_back(std::move(data)); | ||
| 116 | |||
| 117 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | ||
| 118 | } | ||
| 119 | |||
| 120 | void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { | ||
| 121 | const VAddr addr_end = addr + size; | ||
| 122 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 123 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 124 | auto it = invalidation_cache.find(page); | ||
| 125 | if (it == invalidation_cache.end()) { | ||
| 126 | continue; | ||
| 127 | } | ||
| 128 | InvalidatePageEntries(it->second, addr, addr_end); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void ShaderCache::RemovePendingShaders() { | ||
| 133 | if (marked_for_removal.empty()) { | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | // Remove duplicates | ||
| 137 | std::ranges::sort(marked_for_removal); | ||
| 138 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), | ||
| 139 | marked_for_removal.end()); | ||
| 140 | |||
| 141 | std::vector<ShaderInfo*> removed_shaders; | ||
| 142 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 143 | |||
| 144 | std::scoped_lock lock{lookup_mutex}; | ||
| 145 | |||
| 146 | for (Entry* const entry : marked_for_removal) { | ||
| 147 | removed_shaders.push_back(entry->data); | ||
| 148 | |||
| 149 | const auto it = lookup_cache.find(entry->addr_start); | ||
| 150 | ASSERT(it != lookup_cache.end()); | ||
| 151 | lookup_cache.erase(it); | ||
| 152 | } | ||
| 153 | marked_for_removal.clear(); | ||
| 154 | |||
| 155 | if (!removed_shaders.empty()) { | ||
| 156 | RemoveShadersFromStorage(std::move(removed_shaders)); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { | ||
| 161 | size_t index = 0; | ||
| 162 | while (index < entries.size()) { | ||
| 163 | Entry* const entry = entries[index]; | ||
| 164 | if (!entry->Overlaps(addr, addr_end)) { | ||
| 165 | ++index; | ||
| 166 | continue; | ||
| 167 | } | ||
| 168 | |||
| 169 | UnmarkMemory(entry); | ||
| 170 | RemoveEntryFromInvalidationCache(entry); | ||
| 171 | marked_for_removal.push_back(entry); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { | ||
| 176 | const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 177 | for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { | ||
| 178 | const auto entries_it = invalidation_cache.find(page); | ||
| 179 | ASSERT(entries_it != invalidation_cache.end()); | ||
| 180 | std::vector<Entry*>& entries = entries_it->second; | ||
| 181 | |||
| 182 | const auto entry_it = std::ranges::find(entries, entry); | ||
| 183 | ASSERT(entry_it != entries.end()); | ||
| 184 | entries.erase(entry_it); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | |||
| 188 | void ShaderCache::UnmarkMemory(Entry* entry) { | ||
| 189 | if (!entry->is_memory_marked) { | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | entry->is_memory_marked = false; | ||
| 193 | |||
| 194 | const VAddr addr = entry->addr_start; | ||
| 195 | const size_t size = entry->addr_end - addr; | ||
| 196 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||
| 197 | } | ||
| 198 | |||
| 199 | void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) { | ||
| 200 | // Remove them from the cache | ||
| 201 | std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) { | ||
| 202 | return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); | ||
| 203 | }); | ||
| 204 | } | ||
| 205 | |||
| 206 | ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { | ||
| 207 | auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); | ||
| 208 | Entry* const entry_pointer = entry.get(); | ||
| 209 | |||
| 210 | lookup_cache.emplace(addr, std::move(entry)); | ||
| 211 | return entry_pointer; | ||
| 212 | } | ||
| 213 | |||
| 214 | const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { | ||
| 215 | auto info = std::make_unique<ShaderInfo>(); | ||
| 216 | if (const std::optional<u64> cached_hash{env.Analyze()}) { | ||
| 217 | info->unique_hash = *cached_hash; | ||
| 218 | info->size_bytes = env.CachedSize(); | ||
| 219 | } else { | ||
| 220 | // Slow path, not really hit on commercial games | ||
| 221 | // Build a control flow graph to get the real shader size | ||
| 222 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | ||
| 223 | Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; | ||
| 224 | info->unique_hash = env.CalculateHash(); | ||
| 225 | info->size_bytes = env.ReadSize(); | ||
| 226 | } | ||
| 227 | const size_t size_bytes{info->size_bytes}; | ||
| 228 | const ShaderInfo* const result{info.get()}; | ||
| 229 | Register(std::move(info), cpu_addr, size_bytes); | ||
| 230 | return result; | ||
| 231 | } | ||
| 232 | |||
| 233 | } // namespace VideoCommon | ||
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..89a4bcc84 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -4,20 +4,28 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <mutex> | 8 | #include <mutex> |
| 10 | #include <unordered_map> | 9 | #include <unordered_map> |
| 11 | #include <utility> | 10 | #include <utility> |
| 12 | #include <vector> | 11 | #include <vector> |
| 13 | 12 | ||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 16 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 17 | 15 | ||
| 16 | namespace Tegra { | ||
| 17 | class MemoryManager; | ||
| 18 | } | ||
| 19 | |||
| 18 | namespace VideoCommon { | 20 | namespace VideoCommon { |
| 19 | 21 | ||
| 20 | template <class T> | 22 | class GenericEnvironment; |
| 23 | |||
| 24 | struct ShaderInfo { | ||
| 25 | u64 unique_hash{}; | ||
| 26 | size_t size_bytes{}; | ||
| 27 | }; | ||
| 28 | |||
| 21 | class ShaderCache { | 29 | class ShaderCache { |
| 22 | static constexpr u64 PAGE_BITS = 14; | 30 | static constexpr u64 PAGE_BITS = 14; |
| 23 | static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; | 31 | static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; |
| @@ -25,206 +33,100 @@ class ShaderCache { | |||
| 25 | struct Entry { | 33 | struct Entry { |
| 26 | VAddr addr_start; | 34 | VAddr addr_start; |
| 27 | VAddr addr_end; | 35 | VAddr addr_end; |
| 28 | T* data; | 36 | ShaderInfo* data; |
| 29 | 37 | ||
| 30 | bool is_memory_marked = true; | 38 | bool is_memory_marked = true; |
| 31 | 39 | ||
| 32 | constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { | 40 | bool Overlaps(VAddr start, VAddr end) const noexcept { |
| 33 | return start < addr_end && addr_start < end; | 41 | return start < addr_end && addr_start < end; |
| 34 | } | 42 | } |
| 35 | }; | 43 | }; |
| 36 | 44 | ||
| 37 | public: | 45 | public: |
| 38 | virtual ~ShaderCache() = default; | ||
| 39 | |||
| 40 | /// @brief Removes shaders inside a given region | 46 | /// @brief Removes shaders inside a given region |
| 41 | /// @note Checks for ranges | 47 | /// @note Checks for ranges |
| 42 | /// @param addr Start address of the invalidation | 48 | /// @param addr Start address of the invalidation |
| 43 | /// @param size Number of bytes of the invalidation | 49 | /// @param size Number of bytes of the invalidation |
| 44 | void InvalidateRegion(VAddr addr, std::size_t size) { | 50 | void InvalidateRegion(VAddr addr, size_t size); |
| 45 | std::scoped_lock lock{invalidation_mutex}; | ||
| 46 | InvalidatePagesInRegion(addr, size); | ||
| 47 | RemovePendingShaders(); | ||
| 48 | } | ||
| 49 | 51 | ||
| 50 | /// @brief Unmarks a memory region as cached and marks it for removal | 52 | /// @brief Unmarks a memory region as cached and marks it for removal |
| 51 | /// @param addr Start address of the CPU write operation | 53 | /// @param addr Start address of the CPU write operation |
| 52 | /// @param size Number of bytes of the CPU write operation | 54 | /// @param size Number of bytes of the CPU write operation |
| 53 | void OnCPUWrite(VAddr addr, std::size_t size) { | 55 | void OnCPUWrite(VAddr addr, size_t size); |
| 54 | std::lock_guard lock{invalidation_mutex}; | ||
| 55 | InvalidatePagesInRegion(addr, size); | ||
| 56 | } | ||
| 57 | 56 | ||
| 58 | /// @brief Flushes delayed removal operations | 57 | /// @brief Flushes delayed removal operations |
| 59 | void SyncGuestHost() { | 58 | void SyncGuestHost(); |
| 60 | std::scoped_lock lock{invalidation_mutex}; | 59 | |
| 61 | RemovePendingShaders(); | 60 | protected: |
| 62 | } | 61 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, |
| 62 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 63 | Tegra::Engines::KeplerCompute& kepler_compute_); | ||
| 64 | |||
| 65 | /// @brief Update the hashes and information of shader stages | ||
| 66 | /// @param unique_hashes Shader hashes to store into when a stage is enabled | ||
| 67 | /// @return True no success, false on error | ||
| 68 | bool RefreshStages(std::array<u64, 6>& unique_hashes); | ||
| 69 | |||
| 70 | /// @brief Returns information about the current compute shader | ||
| 71 | /// @return Pointer to a valid shader, nullptr on error | ||
| 72 | const ShaderInfo* ComputeShader(); | ||
| 73 | |||
| 74 | Tegra::MemoryManager& gpu_memory; | ||
| 75 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 76 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 63 | 77 | ||
| 78 | std::array<const ShaderInfo*, 6> shader_infos{}; | ||
| 79 | bool last_shaders_valid = false; | ||
| 80 | |||
| 81 | private: | ||
| 64 | /// @brief Tries to obtain a cached shader starting in a given address | 82 | /// @brief Tries to obtain a cached shader starting in a given address |
| 65 | /// @note Doesn't check for ranges, the given address has to be the start of the shader | 83 | /// @note Doesn't check for ranges, the given address has to be the start of the shader |
| 66 | /// @param addr Start address of the shader, this doesn't cache for region | 84 | /// @param addr Start address of the shader, this doesn't cache for region |
| 67 | /// @return Pointer to a valid shader, nullptr when nothing is found | 85 | /// @return Pointer to a valid shader, nullptr when nothing is found |
| 68 | T* TryGet(VAddr addr) const { | 86 | ShaderInfo* TryGet(VAddr addr) const; |
| 69 | std::scoped_lock lock{lookup_mutex}; | ||
| 70 | |||
| 71 | const auto it = lookup_cache.find(addr); | ||
| 72 | if (it == lookup_cache.end()) { | ||
| 73 | return nullptr; | ||
| 74 | } | ||
| 75 | return it->second->data; | ||
| 76 | } | ||
| 77 | |||
| 78 | protected: | ||
| 79 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | ||
| 80 | 87 | ||
| 81 | /// @brief Register in the cache a given entry | 88 | /// @brief Register in the cache a given entry |
| 82 | /// @param data Shader to store in the cache | 89 | /// @param data Shader to store in the cache |
| 83 | /// @param addr Start address of the shader that will be registered | 90 | /// @param addr Start address of the shader that will be registered |
| 84 | /// @param size Size in bytes of the shader | 91 | /// @param size Size in bytes of the shader |
| 85 | void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { | 92 | void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size); |
| 86 | std::scoped_lock lock{invalidation_mutex, lookup_mutex}; | ||
| 87 | |||
| 88 | const VAddr addr_end = addr + size; | ||
| 89 | Entry* const entry = NewEntry(addr, addr_end, data.get()); | ||
| 90 | |||
| 91 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 92 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 93 | invalidation_cache[page].push_back(entry); | ||
| 94 | } | ||
| 95 | |||
| 96 | storage.push_back(std::move(data)); | ||
| 97 | 93 | ||
| 98 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | ||
| 99 | } | ||
| 100 | |||
| 101 | /// @brief Called when a shader is going to be removed | ||
| 102 | /// @param shader Shader that will be removed | ||
| 103 | /// @pre invalidation_cache is locked | ||
| 104 | /// @pre lookup_mutex is locked | ||
| 105 | virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} | ||
| 106 | |||
| 107 | private: | ||
| 108 | /// @brief Invalidate pages in a given region | 94 | /// @brief Invalidate pages in a given region |
| 109 | /// @pre invalidation_mutex is locked | 95 | /// @pre invalidation_mutex is locked |
| 110 | void InvalidatePagesInRegion(VAddr addr, std::size_t size) { | 96 | void InvalidatePagesInRegion(VAddr addr, size_t size); |
| 111 | const VAddr addr_end = addr + size; | ||
| 112 | const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 113 | for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { | ||
| 114 | auto it = invalidation_cache.find(page); | ||
| 115 | if (it == invalidation_cache.end()) { | ||
| 116 | continue; | ||
| 117 | } | ||
| 118 | InvalidatePageEntries(it->second, addr, addr_end); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | 97 | ||
| 122 | /// @brief Remove shaders marked for deletion | 98 | /// @brief Remove shaders marked for deletion |
| 123 | /// @pre invalidation_mutex is locked | 99 | /// @pre invalidation_mutex is locked |
| 124 | void RemovePendingShaders() { | 100 | void RemovePendingShaders(); |
| 125 | if (marked_for_removal.empty()) { | ||
| 126 | return; | ||
| 127 | } | ||
| 128 | // Remove duplicates | ||
| 129 | std::sort(marked_for_removal.begin(), marked_for_removal.end()); | ||
| 130 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), | ||
| 131 | marked_for_removal.end()); | ||
| 132 | |||
| 133 | std::vector<T*> removed_shaders; | ||
| 134 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 135 | |||
| 136 | std::scoped_lock lock{lookup_mutex}; | ||
| 137 | |||
| 138 | for (Entry* const entry : marked_for_removal) { | ||
| 139 | removed_shaders.push_back(entry->data); | ||
| 140 | |||
| 141 | const auto it = lookup_cache.find(entry->addr_start); | ||
| 142 | ASSERT(it != lookup_cache.end()); | ||
| 143 | lookup_cache.erase(it); | ||
| 144 | } | ||
| 145 | marked_for_removal.clear(); | ||
| 146 | |||
| 147 | if (!removed_shaders.empty()) { | ||
| 148 | RemoveShadersFromStorage(std::move(removed_shaders)); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | 101 | ||
| 152 | /// @brief Invalidates entries in a given range for the passed page | 102 | /// @brief Invalidates entries in a given range for the passed page |
| 153 | /// @param entries Vector of entries in the page, it will be modified on overlaps | 103 | /// @param entries Vector of entries in the page, it will be modified on overlaps |
| 154 | /// @param addr Start address of the invalidation | 104 | /// @param addr Start address of the invalidation |
| 155 | /// @param addr_end Non-inclusive end address of the invalidation | 105 | /// @param addr_end Non-inclusive end address of the invalidation |
| 156 | /// @pre invalidation_mutex is locked | 106 | /// @pre invalidation_mutex is locked |
| 157 | void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { | 107 | void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end); |
| 158 | std::size_t index = 0; | ||
| 159 | while (index < entries.size()) { | ||
| 160 | Entry* const entry = entries[index]; | ||
| 161 | if (!entry->Overlaps(addr, addr_end)) { | ||
| 162 | ++index; | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | |||
| 166 | UnmarkMemory(entry); | ||
| 167 | RemoveEntryFromInvalidationCache(entry); | ||
| 168 | marked_for_removal.push_back(entry); | ||
| 169 | } | ||
| 170 | } | ||
| 171 | 108 | ||
| 172 | /// @brief Removes all references to an entry in the invalidation cache | 109 | /// @brief Removes all references to an entry in the invalidation cache |
| 173 | /// @param entry Entry to remove from the invalidation cache | 110 | /// @param entry Entry to remove from the invalidation cache |
| 174 | /// @pre invalidation_mutex is locked | 111 | /// @pre invalidation_mutex is locked |
| 175 | void RemoveEntryFromInvalidationCache(const Entry* entry) { | 112 | void RemoveEntryFromInvalidationCache(const Entry* entry); |
| 176 | const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; | ||
| 177 | for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { | ||
| 178 | const auto entries_it = invalidation_cache.find(page); | ||
| 179 | ASSERT(entries_it != invalidation_cache.end()); | ||
| 180 | std::vector<Entry*>& entries = entries_it->second; | ||
| 181 | |||
| 182 | const auto entry_it = std::find(entries.begin(), entries.end(), entry); | ||
| 183 | ASSERT(entry_it != entries.end()); | ||
| 184 | entries.erase(entry_it); | ||
| 185 | } | ||
| 186 | } | ||
| 187 | 113 | ||
| 188 | /// @brief Unmarks an entry from the rasterizer cache | 114 | /// @brief Unmarks an entry from the rasterizer cache |
| 189 | /// @param entry Entry to unmark from memory | 115 | /// @param entry Entry to unmark from memory |
| 190 | void UnmarkMemory(Entry* entry) { | 116 | void UnmarkMemory(Entry* entry); |
| 191 | if (!entry->is_memory_marked) { | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | entry->is_memory_marked = false; | ||
| 195 | |||
| 196 | const VAddr addr = entry->addr_start; | ||
| 197 | const std::size_t size = entry->addr_end - addr; | ||
| 198 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||
| 199 | } | ||
| 200 | 117 | ||
| 201 | /// @brief Removes a vector of shaders from a list | 118 | /// @brief Removes a vector of shaders from a list |
| 202 | /// @param removed_shaders Shaders to be removed from the storage | 119 | /// @param removed_shaders Shaders to be removed from the storage |
| 203 | /// @pre invalidation_mutex is locked | 120 | /// @pre invalidation_mutex is locked |
| 204 | /// @pre lookup_mutex is locked | 121 | /// @pre lookup_mutex is locked |
| 205 | void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { | 122 | void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders); |
| 206 | // Notify removals | ||
| 207 | for (T* const shader : removed_shaders) { | ||
| 208 | OnShaderRemoval(shader); | ||
| 209 | } | ||
| 210 | |||
| 211 | // Remove them from the cache | ||
| 212 | const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) { | ||
| 213 | return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != | ||
| 214 | removed_shaders.end(); | ||
| 215 | }; | ||
| 216 | std::erase_if(storage, is_removed); | ||
| 217 | } | ||
| 218 | 123 | ||
| 219 | /// @brief Creates a new entry in the lookup cache and returns its pointer | 124 | /// @brief Creates a new entry in the lookup cache and returns its pointer |
| 220 | /// @pre lookup_mutex is locked | 125 | /// @pre lookup_mutex is locked |
| 221 | Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { | 126 | Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); |
| 222 | auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); | ||
| 223 | Entry* const entry_pointer = entry.get(); | ||
| 224 | 127 | ||
| 225 | lookup_cache.emplace(addr, std::move(entry)); | 128 | /// @brief Create a new shader entry and register it |
| 226 | return entry_pointer; | 129 | const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); |
| 227 | } | ||
| 228 | 130 | ||
| 229 | VideoCore::RasterizerInterface& rasterizer; | 131 | VideoCore::RasterizerInterface& rasterizer; |
| 230 | 132 | ||
| @@ -233,7 +135,7 @@ private: | |||
| 233 | 135 | ||
| 234 | std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; | 136 | std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; |
| 235 | std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; | 137 | std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; |
| 236 | std::vector<std::unique_ptr<T>> storage; | 138 | std::vector<std::unique_ptr<ShaderInfo>> storage; |
| 237 | std::vector<Entry*> marked_for_removal; | 139 | std::vector<Entry*> marked_for_removal; |
| 238 | }; | 140 | }; |
| 239 | 141 | ||
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..5dccc0097 --- /dev/null +++ b/src/video_core/shader_environment.cpp | |||
| @@ -0,0 +1,453 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <filesystem> | ||
| 6 | #include <fstream> | ||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/cityhash.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/div_ceil.h" | ||
| 15 | #include "common/fs/fs.h" | ||
| 16 | #include "common/logging/log.h" | ||
| 17 | #include "shader_recompiler/environment.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/shader_environment.h" | ||
| 20 | #include "video_core/textures/texture.h" | ||
| 21 | |||
| 22 | namespace VideoCommon { | ||
| 23 | |||
| 24 | constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; | ||
| 25 | constexpr u32 CACHE_VERSION = 3; | ||
| 26 | |||
| 27 | constexpr size_t INST_SIZE = sizeof(u64); | ||
| 28 | |||
| 29 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 30 | |||
| 31 | static u64 MakeCbufKey(u32 index, u32 offset) { | ||
| 32 | return (static_cast<u64>(index) << 32) | offset; | ||
| 33 | } | ||
| 34 | |||
| 35 | static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { | ||
| 36 | switch (entry.texture_type) { | ||
| 37 | case Tegra::Texture::TextureType::Texture1D: | ||
| 38 | return Shader::TextureType::Color1D; | ||
| 39 | case Tegra::Texture::TextureType::Texture2D: | ||
| 40 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 41 | return Shader::TextureType::Color2D; | ||
| 42 | case Tegra::Texture::TextureType::Texture3D: | ||
| 43 | return Shader::TextureType::Color3D; | ||
| 44 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 45 | return Shader::TextureType::ColorCube; | ||
| 46 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 47 | return Shader::TextureType::ColorArray1D; | ||
| 48 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 49 | return Shader::TextureType::ColorArray2D; | ||
| 50 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 51 | return Shader::TextureType::Buffer; | ||
| 52 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 53 | return Shader::TextureType::ColorArrayCube; | ||
| 54 | default: | ||
| 55 | throw Shader::NotImplementedException("Unknown texture type"); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 60 | u32 start_address_) | ||
| 61 | : gpu_memory{&gpu_memory_}, program_base{program_base_} { | ||
| 62 | start_address = start_address_; | ||
| 63 | } | ||
| 64 | |||
| 65 | GenericEnvironment::~GenericEnvironment() = default; | ||
| 66 | |||
| 67 | u32 GenericEnvironment::TextureBoundBuffer() const { | ||
| 68 | return texture_bound; | ||
| 69 | } | ||
| 70 | |||
| 71 | u32 GenericEnvironment::LocalMemorySize() const { | ||
| 72 | return local_memory_size; | ||
| 73 | } | ||
| 74 | |||
| 75 | u32 GenericEnvironment::SharedMemorySize() const { | ||
| 76 | return shared_memory_size; | ||
| 77 | } | ||
| 78 | |||
| 79 | std::array<u32, 3> GenericEnvironment::WorkgroupSize() const { | ||
| 80 | return workgroup_size; | ||
| 81 | } | ||
| 82 | |||
| 83 | u64 GenericEnvironment::ReadInstruction(u32 address) { | ||
| 84 | read_lowest = std::min(read_lowest, address); | ||
| 85 | read_highest = std::max(read_highest, address); | ||
| 86 | |||
| 87 | if (address >= cached_lowest && address < cached_highest) { | ||
| 88 | return code[(address - cached_lowest) / INST_SIZE]; | ||
| 89 | } | ||
| 90 | has_unbound_instructions = true; | ||
| 91 | return gpu_memory->Read<u64>(program_base + address); | ||
| 92 | } | ||
| 93 | |||
| 94 | std::optional<u64> GenericEnvironment::Analyze() { | ||
| 95 | const std::optional<u64> size{TryFindSize()}; | ||
| 96 | if (!size) { | ||
| 97 | return std::nullopt; | ||
| 98 | } | ||
| 99 | cached_lowest = start_address; | ||
| 100 | cached_highest = start_address + static_cast<u32>(*size); | ||
| 101 | return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size); | ||
| 102 | } | ||
| 103 | |||
| 104 | void GenericEnvironment::SetCachedSize(size_t size_bytes) { | ||
| 105 | cached_lowest = start_address; | ||
| 106 | cached_highest = start_address + static_cast<u32>(size_bytes); | ||
| 107 | code.resize(CachedSize()); | ||
| 108 | gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); | ||
| 109 | } | ||
| 110 | |||
| 111 | size_t GenericEnvironment::CachedSize() const noexcept { | ||
| 112 | return cached_highest - cached_lowest + INST_SIZE; | ||
| 113 | } | ||
| 114 | |||
| 115 | size_t GenericEnvironment::ReadSize() const noexcept { | ||
| 116 | return read_highest - read_lowest + INST_SIZE; | ||
| 117 | } | ||
| 118 | |||
| 119 | bool GenericEnvironment::CanBeSerialized() const noexcept { | ||
| 120 | return !has_unbound_instructions; | ||
| 121 | } | ||
| 122 | |||
| 123 | u64 GenericEnvironment::CalculateHash() const { | ||
| 124 | const size_t size{ReadSize()}; | ||
| 125 | const auto data{std::make_unique<char[]>(size)}; | ||
| 126 | gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); | ||
| 127 | return Common::CityHash64(data.get(), size); | ||
| 128 | } | ||
| 129 | |||
| 130 | void GenericEnvironment::Serialize(std::ofstream& file) const { | ||
| 131 | const u64 code_size{static_cast<u64>(CachedSize())}; | ||
| 132 | const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||
| 133 | const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||
| 134 | |||
| 135 | file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||
| 136 | .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 137 | .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 138 | .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 139 | .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||
| 140 | .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | ||
| 141 | .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) | ||
| 142 | .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) | ||
| 143 | .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) | ||
| 144 | .write(reinterpret_cast<const char*>(code.data()), code_size); | ||
| 145 | for (const auto [key, type] : texture_types) { | ||
| 146 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 147 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 148 | } | ||
| 149 | for (const auto [key, type] : cbuf_values) { | ||
| 150 | file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||
| 151 | .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||
| 152 | } | ||
| 153 | if (stage == Shader::Stage::Compute) { | ||
| 154 | file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 155 | .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 156 | } else { | ||
| 157 | file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | std::optional<u64> GenericEnvironment::TryFindSize() { | ||
| 162 | static constexpr size_t BLOCK_SIZE = 0x1000; | ||
| 163 | static constexpr size_t MAXIMUM_SIZE = 0x100000; | ||
| 164 | |||
| 165 | static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; | ||
| 166 | static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; | ||
| 167 | |||
| 168 | GPUVAddr guest_addr{program_base + start_address}; | ||
| 169 | size_t offset{0}; | ||
| 170 | size_t size{BLOCK_SIZE}; | ||
| 171 | while (size <= MAXIMUM_SIZE) { | ||
| 172 | code.resize(size / INST_SIZE); | ||
| 173 | u64* const data = code.data() + offset / INST_SIZE; | ||
| 174 | gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); | ||
| 175 | for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { | ||
| 176 | const u64 inst = data[index / INST_SIZE]; | ||
| 177 | if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { | ||
| 178 | return offset + index; | ||
| 179 | } | ||
| 180 | } | ||
| 181 | guest_addr += BLOCK_SIZE; | ||
| 182 | size += BLOCK_SIZE; | ||
| 183 | offset += BLOCK_SIZE; | ||
| 184 | } | ||
| 185 | return std::nullopt; | ||
| 186 | } | ||
| 187 | |||
| 188 | Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, | ||
| 189 | bool via_header_index, u32 raw) { | ||
| 190 | const TextureHandle handle{raw, via_header_index}; | ||
| 191 | const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; | ||
| 192 | Tegra::Texture::TICEntry entry; | ||
| 193 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | ||
| 194 | const Shader::TextureType result{ConvertType(entry)}; | ||
| 195 | texture_types.emplace(raw, result); | ||
| 196 | return result; | ||
| 197 | } | ||
| 198 | |||
| 199 | GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 200 | Tegra::MemoryManager& gpu_memory_, | ||
| 201 | Maxwell::ShaderProgram program, GPUVAddr program_base_, | ||
| 202 | u32 start_address_) | ||
| 203 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { | ||
| 204 | gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); | ||
| 205 | switch (program) { | ||
| 206 | case Maxwell::ShaderProgram::VertexA: | ||
| 207 | stage = Shader::Stage::VertexA; | ||
| 208 | stage_index = 0; | ||
| 209 | break; | ||
| 210 | case Maxwell::ShaderProgram::VertexB: | ||
| 211 | stage = Shader::Stage::VertexB; | ||
| 212 | stage_index = 0; | ||
| 213 | break; | ||
| 214 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 215 | stage = Shader::Stage::TessellationControl; | ||
| 216 | stage_index = 1; | ||
| 217 | break; | ||
| 218 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 219 | stage = Shader::Stage::TessellationEval; | ||
| 220 | stage_index = 2; | ||
| 221 | break; | ||
| 222 | case Maxwell::ShaderProgram::Geometry: | ||
| 223 | stage = Shader::Stage::Geometry; | ||
| 224 | stage_index = 3; | ||
| 225 | break; | ||
| 226 | case Maxwell::ShaderProgram::Fragment: | ||
| 227 | stage = Shader::Stage::Fragment; | ||
| 228 | stage_index = 4; | ||
| 229 | break; | ||
| 230 | default: | ||
| 231 | UNREACHABLE_MSG("Invalid program={}", program); | ||
| 232 | break; | ||
| 233 | } | ||
| 234 | const u64 local_size{sph.LocalMemorySize()}; | ||
| 235 | ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||
| 236 | local_memory_size = static_cast<u32>(local_size); | ||
| 237 | texture_bound = maxwell3d->regs.tex_cb_index; | ||
| 238 | } | ||
| 239 | |||
| 240 | u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||
| 241 | const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; | ||
| 242 | ASSERT(cbuf.enabled); | ||
| 243 | u32 value{}; | ||
| 244 | if (cbuf_offset < cbuf.size) { | ||
| 245 | value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); | ||
| 246 | } | ||
| 247 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 248 | return value; | ||
| 249 | } | ||
| 250 | |||
| 251 | Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | ||
| 252 | const auto& regs{maxwell3d->regs}; | ||
| 253 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 254 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); | ||
| 255 | } | ||
| 256 | |||
| 257 | ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 258 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 259 | u32 start_address_) | ||
| 260 | : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ | ||
| 261 | &kepler_compute_} { | ||
| 262 | const auto& qmd{kepler_compute->launch_description}; | ||
| 263 | stage = Shader::Stage::Compute; | ||
| 264 | local_memory_size = qmd.local_pos_alloc; | ||
| 265 | texture_bound = kepler_compute->regs.tex_cb_index; | ||
| 266 | shared_memory_size = qmd.shared_alloc; | ||
| 267 | workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; | ||
| 268 | } | ||
| 269 | |||
| 270 | u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||
| 271 | const auto& qmd{kepler_compute->launch_description}; | ||
| 272 | ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); | ||
| 273 | const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; | ||
| 274 | u32 value{}; | ||
| 275 | if (cbuf_offset < cbuf.size) { | ||
| 276 | value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); | ||
| 277 | } | ||
| 278 | cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); | ||
| 279 | return value; | ||
| 280 | } | ||
| 281 | |||
| 282 | Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { | ||
| 283 | const auto& regs{kepler_compute->regs}; | ||
| 284 | const auto& qmd{kepler_compute->launch_description}; | ||
| 285 | return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); | ||
| 286 | } | ||
| 287 | |||
| 288 | void FileEnvironment::Deserialize(std::ifstream& file) { | ||
| 289 | u64 code_size{}; | ||
| 290 | u64 num_texture_types{}; | ||
| 291 | u64 num_cbuf_values{}; | ||
| 292 | file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||
| 293 | .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||
| 294 | .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||
| 295 | .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||
| 296 | .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||
| 297 | .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | ||
| 298 | .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) | ||
| 299 | .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) | ||
| 300 | .read(reinterpret_cast<char*>(&stage), sizeof(stage)); | ||
| 301 | code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); | ||
| 302 | file.read(reinterpret_cast<char*>(code.get()), code_size); | ||
| 303 | for (size_t i = 0; i < num_texture_types; ++i) { | ||
| 304 | u32 key; | ||
| 305 | Shader::TextureType type; | ||
| 306 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 307 | .read(reinterpret_cast<char*>(&type), sizeof(type)); | ||
| 308 | texture_types.emplace(key, type); | ||
| 309 | } | ||
| 310 | for (size_t i = 0; i < num_cbuf_values; ++i) { | ||
| 311 | u64 key; | ||
| 312 | u32 value; | ||
| 313 | file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||
| 314 | .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||
| 315 | cbuf_values.emplace(key, value); | ||
| 316 | } | ||
| 317 | if (stage == Shader::Stage::Compute) { | ||
| 318 | file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | ||
| 319 | .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||
| 320 | } else { | ||
| 321 | file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | u64 FileEnvironment::ReadInstruction(u32 address) { | ||
| 326 | if (address < read_lowest || address > read_highest) { | ||
| 327 | throw Shader::LogicError("Out of bounds address {}", address); | ||
| 328 | } | ||
| 329 | return code[(address - read_lowest) / sizeof(u64)]; | ||
| 330 | } | ||
| 331 | |||
| 332 | u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||
| 333 | const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; | ||
| 334 | if (it == cbuf_values.end()) { | ||
| 335 | throw Shader::LogicError("Uncached read texture type"); | ||
| 336 | } | ||
| 337 | return it->second; | ||
| 338 | } | ||
| 339 | |||
| 340 | Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { | ||
| 341 | const auto it{texture_types.find(handle)}; | ||
| 342 | if (it == texture_types.end()) { | ||
| 343 | throw Shader::LogicError("Uncached read texture type"); | ||
| 344 | } | ||
| 345 | return it->second; | ||
| 346 | } | ||
| 347 | |||
| 348 | u32 FileEnvironment::LocalMemorySize() const { | ||
| 349 | return local_memory_size; | ||
| 350 | } | ||
| 351 | |||
| 352 | u32 FileEnvironment::SharedMemorySize() const { | ||
| 353 | return shared_memory_size; | ||
| 354 | } | ||
| 355 | |||
| 356 | u32 FileEnvironment::TextureBoundBuffer() const { | ||
| 357 | return texture_bound; | ||
| 358 | } | ||
| 359 | |||
| 360 | std::array<u32, 3> FileEnvironment::WorkgroupSize() const { | ||
| 361 | return workgroup_size; | ||
| 362 | } | ||
| 363 | |||
| 364 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||
| 365 | const std::filesystem::path& filename) try { | ||
| 366 | std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); | ||
| 367 | file.exceptions(std::ifstream::failbit); | ||
| 368 | if (!file.is_open()) { | ||
| 369 | LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", | ||
| 370 | Common::FS::PathToUTF8String(filename)); | ||
| 371 | return; | ||
| 372 | } | ||
| 373 | if (file.tellp() == 0) { | ||
| 374 | // Write header | ||
| 375 | file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) | ||
| 376 | .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION)); | ||
| 377 | } | ||
| 378 | if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { | ||
| 379 | return; | ||
| 380 | } | ||
| 381 | const u32 num_envs{static_cast<u32>(envs.size())}; | ||
| 382 | file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); | ||
| 383 | for (const GenericEnvironment* const env : envs) { | ||
| 384 | env->Serialize(file); | ||
| 385 | } | ||
| 386 | file.write(key.data(), key.size_bytes()); | ||
| 387 | |||
| 388 | } catch (const std::ios_base::failure& e) { | ||
| 389 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 390 | if (!Common::FS::RemoveFile(filename)) { | ||
| 391 | LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", | ||
| 392 | Common::FS::PathToUTF8String(filename)); | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | void LoadPipelines( | ||
| 397 | std::stop_token stop_loading, const std::filesystem::path& filename, | ||
| 398 | Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, | ||
| 399 | Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try { | ||
| 400 | std::ifstream file(filename, std::ios::binary | std::ios::ate); | ||
| 401 | if (!file.is_open()) { | ||
| 402 | return; | ||
| 403 | } | ||
| 404 | file.exceptions(std::ifstream::failbit); | ||
| 405 | const auto end{file.tellg()}; | ||
| 406 | file.seekg(0, std::ios::beg); | ||
| 407 | |||
| 408 | std::array<char, 8> magic_number; | ||
| 409 | u32 cache_version; | ||
| 410 | file.read(magic_number.data(), magic_number.size()) | ||
| 411 | .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); | ||
| 412 | if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { | ||
| 413 | file.close(); | ||
| 414 | if (Common::FS::RemoveFile(filename)) { | ||
| 415 | if (magic_number != MAGIC_NUMBER) { | ||
| 416 | LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); | ||
| 417 | } | ||
| 418 | if (cache_version != CACHE_VERSION) { | ||
| 419 | LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); | ||
| 420 | } | ||
| 421 | } else { | ||
| 422 | LOG_ERROR(Common_Filesystem, | ||
| 423 | "Invalid pipeline cache file and failed to delete it in \"{}\"", | ||
| 424 | Common::FS::PathToUTF8String(filename)); | ||
| 425 | } | ||
| 426 | return; | ||
| 427 | } | ||
| 428 | while (file.tellg() != end) { | ||
| 429 | if (stop_loading.stop_requested()) { | ||
| 430 | return; | ||
| 431 | } | ||
| 432 | u32 num_envs{}; | ||
| 433 | file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); | ||
| 434 | std::vector<FileEnvironment> envs(num_envs); | ||
| 435 | for (FileEnvironment& env : envs) { | ||
| 436 | env.Deserialize(file); | ||
| 437 | } | ||
| 438 | if (envs.front().ShaderStage() == Shader::Stage::Compute) { | ||
| 439 | load_compute(file, std::move(envs.front())); | ||
| 440 | } else { | ||
| 441 | load_graphics(file, std::move(envs)); | ||
| 442 | } | ||
| 443 | } | ||
| 444 | |||
| 445 | } catch (const std::ios_base::failure& e) { | ||
| 446 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 447 | if (!Common::FS::RemoveFile(filename)) { | ||
| 448 | LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", | ||
| 449 | Common::FS::PathToUTF8String(filename)); | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | } // namespace VideoCommon | ||
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..37d712045 --- /dev/null +++ b/src/video_core/shader_environment.h | |||
| @@ -0,0 +1,198 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <filesystem> | ||
| 10 | #include <iosfwd> | ||
| 11 | #include <limits> | ||
| 12 | #include <memory> | ||
| 13 | #include <optional> | ||
| 14 | #include <span> | ||
| 15 | #include <type_traits> | ||
| 16 | #include <unordered_map> | ||
| 17 | #include <vector> | ||
| 18 | |||
| 19 | #include "common/common_types.h" | ||
| 20 | #include "common/unique_function.h" | ||
| 21 | #include "shader_recompiler/environment.h" | ||
| 22 | #include "video_core/engines/kepler_compute.h" | ||
| 23 | #include "video_core/engines/maxwell_3d.h" | ||
| 24 | #include "video_core/textures/texture.h" | ||
| 25 | |||
| 26 | namespace Tegra { | ||
| 27 | class Memorymanager; | ||
| 28 | } | ||
| 29 | |||
| 30 | namespace VideoCommon { | ||
| 31 | |||
| 32 | struct TextureHandle { | ||
| 33 | explicit TextureHandle(u32 data, bool via_header_index) { | ||
| 34 | if (via_header_index) { | ||
| 35 | image = data; | ||
| 36 | sampler = data; | ||
| 37 | } else { | ||
| 38 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 39 | image = handle.tic_id; | ||
| 40 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | u32 image; | ||
| 45 | u32 sampler; | ||
| 46 | }; | ||
| 47 | |||
| 48 | class GenericEnvironment : public Shader::Environment { | ||
| 49 | public: | ||
| 50 | explicit GenericEnvironment() = default; | ||
| 51 | explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 52 | u32 start_address_); | ||
| 53 | |||
| 54 | ~GenericEnvironment() override; | ||
| 55 | |||
| 56 | [[nodiscard]] u32 TextureBoundBuffer() const final; | ||
| 57 | |||
| 58 | [[nodiscard]] u32 LocalMemorySize() const final; | ||
| 59 | |||
| 60 | [[nodiscard]] u32 SharedMemorySize() const final; | ||
| 61 | |||
| 62 | [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final; | ||
| 63 | |||
| 64 | [[nodiscard]] u64 ReadInstruction(u32 address) final; | ||
| 65 | |||
| 66 | [[nodiscard]] std::optional<u64> Analyze(); | ||
| 67 | |||
| 68 | void SetCachedSize(size_t size_bytes); | ||
| 69 | |||
| 70 | [[nodiscard]] size_t CachedSize() const noexcept; | ||
| 71 | |||
| 72 | [[nodiscard]] size_t ReadSize() const noexcept; | ||
| 73 | |||
| 74 | [[nodiscard]] bool CanBeSerialized() const noexcept; | ||
| 75 | |||
| 76 | [[nodiscard]] u64 CalculateHash() const; | ||
| 77 | |||
| 78 | void Serialize(std::ofstream& file) const; | ||
| 79 | |||
| 80 | protected: | ||
| 81 | std::optional<u64> TryFindSize(); | ||
| 82 | |||
| 83 | Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, | ||
| 84 | u32 raw); | ||
| 85 | |||
| 86 | Tegra::MemoryManager* gpu_memory{}; | ||
| 87 | GPUVAddr program_base{}; | ||
| 88 | |||
| 89 | std::vector<u64> code; | ||
| 90 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 91 | std::unordered_map<u64, u32> cbuf_values; | ||
| 92 | |||
| 93 | u32 local_memory_size{}; | ||
| 94 | u32 texture_bound{}; | ||
| 95 | u32 shared_memory_size{}; | ||
| 96 | std::array<u32, 3> workgroup_size{}; | ||
| 97 | |||
| 98 | u32 read_lowest = std::numeric_limits<u32>::max(); | ||
| 99 | u32 read_highest = 0; | ||
| 100 | |||
| 101 | u32 cached_lowest = std::numeric_limits<u32>::max(); | ||
| 102 | u32 cached_highest = 0; | ||
| 103 | |||
| 104 | bool has_unbound_instructions = false; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class GraphicsEnvironment final : public GenericEnvironment { | ||
| 108 | public: | ||
| 109 | explicit GraphicsEnvironment() = default; | ||
| 110 | explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 111 | Tegra::MemoryManager& gpu_memory_, | ||
| 112 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, | ||
| 113 | GPUVAddr program_base_, u32 start_address_); | ||
| 114 | |||
| 115 | ~GraphicsEnvironment() override = default; | ||
| 116 | |||
| 117 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; | ||
| 118 | |||
| 119 | Shader::TextureType ReadTextureType(u32 handle) override; | ||
| 120 | |||
| 121 | private: | ||
| 122 | Tegra::Engines::Maxwell3D* maxwell3d{}; | ||
| 123 | size_t stage_index{}; | ||
| 124 | }; | ||
| 125 | |||
| 126 | class ComputeEnvironment final : public GenericEnvironment { | ||
| 127 | public: | ||
| 128 | explicit ComputeEnvironment() = default; | ||
| 129 | explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 130 | Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, | ||
| 131 | u32 start_address_); | ||
| 132 | |||
| 133 | ~ComputeEnvironment() override = default; | ||
| 134 | |||
| 135 | u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; | ||
| 136 | |||
| 137 | Shader::TextureType ReadTextureType(u32 handle) override; | ||
| 138 | |||
| 139 | private: | ||
| 140 | Tegra::Engines::KeplerCompute* kepler_compute{}; | ||
| 141 | }; | ||
| 142 | |||
| 143 | class FileEnvironment final : public Shader::Environment { | ||
| 144 | public: | ||
| 145 | FileEnvironment() = default; | ||
| 146 | ~FileEnvironment() override = default; | ||
| 147 | |||
| 148 | FileEnvironment& operator=(FileEnvironment&&) noexcept = default; | ||
| 149 | FileEnvironment(FileEnvironment&&) noexcept = default; | ||
| 150 | |||
| 151 | FileEnvironment& operator=(const FileEnvironment&) = delete; | ||
| 152 | FileEnvironment(const FileEnvironment&) = delete; | ||
| 153 | |||
| 154 | void Deserialize(std::ifstream& file); | ||
| 155 | |||
| 156 | [[nodiscard]] u64 ReadInstruction(u32 address) override; | ||
| 157 | |||
| 158 | [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; | ||
| 159 | |||
| 160 | [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; | ||
| 161 | |||
| 162 | [[nodiscard]] u32 LocalMemorySize() const override; | ||
| 163 | |||
| 164 | [[nodiscard]] u32 SharedMemorySize() const override; | ||
| 165 | |||
| 166 | [[nodiscard]] u32 TextureBoundBuffer() const override; | ||
| 167 | |||
| 168 | [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; | ||
| 169 | |||
| 170 | private: | ||
| 171 | std::unique_ptr<u64[]> code; | ||
| 172 | std::unordered_map<u32, Shader::TextureType> texture_types; | ||
| 173 | std::unordered_map<u64, u32> cbuf_values; | ||
| 174 | std::array<u32, 3> workgroup_size{}; | ||
| 175 | u32 local_memory_size{}; | ||
| 176 | u32 shared_memory_size{}; | ||
| 177 | u32 texture_bound{}; | ||
| 178 | u32 read_lowest{}; | ||
| 179 | u32 read_highest{}; | ||
| 180 | }; | ||
| 181 | |||
| 182 | void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||
| 183 | const std::filesystem::path& filename); | ||
| 184 | |||
| 185 | template <typename Key, typename Envs> | ||
| 186 | void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { | ||
| 187 | static_assert(std::is_trivially_copyable_v<Key>); | ||
| 188 | static_assert(std::has_unique_object_representations_v<Key>); | ||
| 189 | SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)), | ||
| 190 | std::span(envs.data(), envs.size()), filename); | ||
| 191 | } | ||
| 192 | |||
| 193 | void LoadPipelines( | ||
| 194 | std::stop_token stop_loading, const std::filesystem::path& filename, | ||
| 195 | Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, | ||
| 196 | Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics); | ||
| 197 | |||
| 198 | } // namespace VideoCommon | ||