summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp719
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
-rw-r--r--src/video_core/shader_cache.cpp233
-rw-r--r--src/video_core/shader_cache.h198
-rw-r--r--src/video_core/shader_environment.cpp453
-rw-r--r--src/video_core/shader_environment.h198
12 files changed, 1095 insertions, 824 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3166a69dc..6e0e4b8f5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -145,7 +145,10 @@ add_library(video_core STATIC
145 renderer_vulkan/vk_texture_cache.h 145 renderer_vulkan/vk_texture_cache.h
146 renderer_vulkan/vk_update_descriptor.cpp 146 renderer_vulkan/vk_update_descriptor.cpp
147 renderer_vulkan/vk_update_descriptor.h 147 renderer_vulkan/vk_update_descriptor.h
148 shader_cache.cpp
148 shader_cache.h 149 shader_cache.h
150 shader_environment.cpp
151 shader_environment.h
149 shader_notify.cpp 152 shader_notify.cpp
150 shader_notify.h 153 shader_notify.h
151 surface.cpp 154 surface.cpp
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1f58f8791..2fdcbe4ba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -217,7 +217,7 @@ private:
217 TextureCache texture_cache; 217 TextureCache texture_cache;
218 BufferCacheRuntime buffer_cache_runtime; 218 BufferCacheRuntime buffer_cache_runtime;
219 BufferCache buffer_cache; 219 BufferCache buffer_cache;
220 ShaderCacheOpenGL shader_cache; 220 ShaderCache shader_cache;
221 QueryCache query_cache; 221 QueryCache query_cache;
222 AccelerateDMA accelerate_dma; 222 AccelerateDMA accelerate_dma;
223 FenceManagerOpenGL fence_manager; 223 FenceManagerOpenGL fence_manager;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4dd166156..c3e490b40 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -29,18 +29,13 @@
29 29
30namespace OpenGL { 30namespace OpenGL {
31 31
32Shader::Shader() = default; 32ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
33 33 Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
34Shader::~Shader() = default; 34 Tegra::Engines::KeplerCompute& kepler_compute_,
35 35 Tegra::MemoryManager& gpu_memory_, const Device& device_)
36ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, 36 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
37 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 37 emu_window{emu_window_}, gpu{gpu_}, device{device_} {}
38 Tegra::Engines::Maxwell3D& maxwell3d_, 38
39 Tegra::Engines::KeplerCompute& kepler_compute_, 39ShaderCache::~ShaderCache() = default;
40 Tegra::MemoryManager& gpu_memory_, const Device& device_)
41 : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
42 maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
43
44ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
45 40
46} // namespace OpenGL 41} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index ad3d15a76..96520e17c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -36,27 +36,59 @@ class RasterizerOpenGL;
36 36
37using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38 38
39class Shader { 39struct GraphicsProgramKey {
40 struct TransformFeedbackState {
41 struct Layout {
42 u32 stream;
43 u32 varying_count;
44 u32 stride;
45 };
46 std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
47 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
48 };
49
50 std::array<u64, 6> unique_hashes;
51 std::array<u8, Maxwell::NumRenderTargets> color_formats;
52 union {
53 u32 raw;
54 BitField<0, 1, u32> xfb_enabled;
55 BitField<1, 1, u32> early_z;
56 BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
57 BitField<6, 2, u32> tessellation_primitive;
58 BitField<8, 2, u32> tessellation_spacing;
59 BitField<10, 1, u32> tessellation_clockwise;
60 };
61 u32 padding;
62 TransformFeedbackState xfb_state;
63
64 [[nodiscard]] size_t Size() const noexcept {
65 if (xfb_enabled != 0) {
66 return sizeof(GraphicsProgramKey);
67 } else {
68 return offsetof(GraphicsProgramKey, padding);
69 }
70 }
71};
72static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
73static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
74static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
75
76class GraphicsProgram {
40public: 77public:
41 explicit Shader(); 78private:
42 ~Shader();
43}; 79};
44 80
45class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 81class ShaderCache : public VideoCommon::ShaderCache {
46public: 82public:
47 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, 83 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
48 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, 84 Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
49 Tegra::Engines::Maxwell3D& maxwell3d_, 85 Tegra::Engines::KeplerCompute& kepler_compute_,
50 Tegra::Engines::KeplerCompute& kepler_compute_, 86 Tegra::MemoryManager& gpu_memory_, const Device& device_);
51 Tegra::MemoryManager& gpu_memory_, const Device& device_); 87 ~ShaderCache();
52 ~ShaderCacheOpenGL() override;
53 88
54private: 89private:
55 Core::Frontend::EmuWindow& emu_window; 90 Core::Frontend::EmuWindow& emu_window;
56 Tegra::GPU& gpu; 91 Tegra::GPU& gpu;
57 Tegra::MemoryManager& gpu_memory;
58 Tegra::Engines::Maxwell3D& maxwell3d;
59 Tegra::Engines::KeplerCompute& kepler_compute;
60 const Device& device; 92 const Device& device;
61}; 93};
62 94
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 85e21f611..e362d13c5 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -23,7 +23,7 @@
23namespace Vulkan { 23namespace Vulkan {
24 24
25struct GraphicsPipelineCacheKey { 25struct GraphicsPipelineCacheKey {
26 std::array<u128, 6> unique_hashes; 26 std::array<u64, 6> unique_hashes;
27 FixedPipelineState state; 27 FixedPipelineState state;
28 28
29 size_t Hash() const noexcept; 29 size_t Hash() const noexcept;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 9d9729022..0822862fe 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -11,7 +11,8 @@
11 11
12#include "common/bit_cast.h" 12#include "common/bit_cast.h"
13#include "common/cityhash.h" 13#include "common/cityhash.h"
14#include "common/file_util.h" 14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
15#include "common/microprofile.h" 16#include "common/microprofile.h"
16#include "common/thread_worker.h" 17#include "common/thread_worker.h"
17#include "core/core.h" 18#include "core/core.h"
@@ -36,6 +37,7 @@
36#include "video_core/renderer_vulkan/vk_shader_util.h" 37#include "video_core/renderer_vulkan/vk_shader_util.h"
37#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
38#include "video_core/shader_cache.h" 39#include "video_core/shader_cache.h"
40#include "video_core/shader_environment.h"
39#include "video_core/shader_notify.h" 41#include "video_core/shader_notify.h"
40#include "video_core/vulkan_common/vulkan_device.h" 42#include "video_core/vulkan_common/vulkan_device.h"
41#include "video_core/vulkan_common/vulkan_wrapper.h" 43#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -43,449 +45,19 @@
43namespace Vulkan { 45namespace Vulkan {
44MICROPROFILE_DECLARE(Vulkan_PipelineCache); 46MICROPROFILE_DECLARE(Vulkan_PipelineCache);
45 47
46template <typename Container>
47auto MakeSpan(Container& container) {
48 return std::span(container.data(), container.size());
49}
50
51static u64 MakeCbufKey(u32 index, u32 offset) {
52 return (static_cast<u64>(index) << 32) | offset;
53}
54
55class GenericEnvironment : public Shader::Environment {
56public:
57 explicit GenericEnvironment() = default;
58 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
59 u32 start_address_)
60 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
61 start_address = start_address_;
62 }
63
64 ~GenericEnvironment() override = default;
65
66 u32 TextureBoundBuffer() const final {
67 return texture_bound;
68 }
69
70 u32 LocalMemorySize() const final {
71 return local_memory_size;
72 }
73
74 u32 SharedMemorySize() const final {
75 return shared_memory_size;
76 }
77
78 std::array<u32, 3> WorkgroupSize() const final {
79 return workgroup_size;
80 }
81
82 u64 ReadInstruction(u32 address) final {
83 read_lowest = std::min(read_lowest, address);
84 read_highest = std::max(read_highest, address);
85
86 if (address >= cached_lowest && address < cached_highest) {
87 return code[(address - cached_lowest) / INST_SIZE];
88 }
89 has_unbound_instructions = true;
90 return gpu_memory->Read<u64>(program_base + address);
91 }
92
93 std::optional<u128> Analyze() {
94 const std::optional<u64> size{TryFindSize()};
95 if (!size) {
96 return std::nullopt;
97 }
98 cached_lowest = start_address;
99 cached_highest = start_address + static_cast<u32>(*size);
100 return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size);
101 }
102
103 void SetCachedSize(size_t size_bytes) {
104 cached_lowest = start_address;
105 cached_highest = start_address + static_cast<u32>(size_bytes);
106 code.resize(CachedSize());
107 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
108 }
109
110 [[nodiscard]] size_t CachedSize() const noexcept {
111 return cached_highest - cached_lowest + INST_SIZE;
112 }
113
114 [[nodiscard]] size_t ReadSize() const noexcept {
115 return read_highest - read_lowest + INST_SIZE;
116 }
117
118 [[nodiscard]] bool CanBeSerialized() const noexcept {
119 return !has_unbound_instructions;
120 }
121
122 [[nodiscard]] u128 CalculateHash() const {
123 const size_t size{ReadSize()};
124 const auto data{std::make_unique<char[]>(size)};
125 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
126 return Common::CityHash128(data.get(), size);
127 }
128
129 void Serialize(std::ofstream& file) const {
130 const u64 code_size{static_cast<u64>(CachedSize())};
131 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
132 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
133
134 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
135 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
136 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
137 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
138 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
139 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
140 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
141 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
142 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
143 .write(reinterpret_cast<const char*>(code.data()), code_size);
144 for (const auto [key, type] : texture_types) {
145 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
146 .write(reinterpret_cast<const char*>(&type), sizeof(type));
147 }
148 for (const auto [key, type] : cbuf_values) {
149 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
150 .write(reinterpret_cast<const char*>(&type), sizeof(type));
151 }
152 if (stage == Shader::Stage::Compute) {
153 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
154 .write(reinterpret_cast<const char*>(&shared_memory_size),
155 sizeof(shared_memory_size));
156 } else {
157 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
158 }
159 }
160
161protected:
162 static constexpr size_t INST_SIZE = sizeof(u64);
163
164 std::optional<u64> TryFindSize() {
165 constexpr size_t BLOCK_SIZE = 0x1000;
166 constexpr size_t MAXIMUM_SIZE = 0x100000;
167
168 constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
169 constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
170
171 GPUVAddr guest_addr{program_base + start_address};
172 size_t offset{0};
173 size_t size{BLOCK_SIZE};
174 while (size <= MAXIMUM_SIZE) {
175 code.resize(size / INST_SIZE);
176 u64* const data = code.data() + offset / INST_SIZE;
177 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
178 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
179 const u64 inst = data[index / INST_SIZE];
180 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
181 return offset + index;
182 }
183 }
184 guest_addr += BLOCK_SIZE;
185 size += BLOCK_SIZE;
186 offset += BLOCK_SIZE;
187 }
188 return std::nullopt;
189 }
190
191 Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
192 u32 raw) {
193 const TextureHandle handle{raw, via_header_index};
194 const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
195 Tegra::Texture::TICEntry entry;
196 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
197
198 const Shader::TextureType result{[&] {
199 switch (entry.texture_type) {
200 case Tegra::Texture::TextureType::Texture1D:
201 return Shader::TextureType::Color1D;
202 case Tegra::Texture::TextureType::Texture2D:
203 case Tegra::Texture::TextureType::Texture2DNoMipmap:
204 return Shader::TextureType::Color2D;
205 case Tegra::Texture::TextureType::Texture3D:
206 return Shader::TextureType::Color3D;
207 case Tegra::Texture::TextureType::TextureCubemap:
208 return Shader::TextureType::ColorCube;
209 case Tegra::Texture::TextureType::Texture1DArray:
210 return Shader::TextureType::ColorArray1D;
211 case Tegra::Texture::TextureType::Texture2DArray:
212 return Shader::TextureType::ColorArray2D;
213 case Tegra::Texture::TextureType::Texture1DBuffer:
214 return Shader::TextureType::Buffer;
215 case Tegra::Texture::TextureType::TextureCubeArray:
216 return Shader::TextureType::ColorArrayCube;
217 default:
218 throw Shader::NotImplementedException("Unknown texture type");
219 }
220 }()};
221 texture_types.emplace(raw, result);
222 return result;
223 }
224
225 Tegra::MemoryManager* gpu_memory{};
226 GPUVAddr program_base{};
227
228 std::vector<u64> code;
229 std::unordered_map<u32, Shader::TextureType> texture_types;
230 std::unordered_map<u64, u32> cbuf_values;
231
232 u32 local_memory_size{};
233 u32 texture_bound{};
234 u32 shared_memory_size{};
235 std::array<u32, 3> workgroup_size{};
236
237 u32 read_lowest = std::numeric_limits<u32>::max();
238 u32 read_highest = 0;
239
240 u32 cached_lowest = std::numeric_limits<u32>::max();
241 u32 cached_highest = 0;
242
243 bool has_unbound_instructions = false;
244};
245
246namespace { 48namespace {
247using Shader::Backend::SPIRV::EmitSPIRV; 49using Shader::Backend::SPIRV::EmitSPIRV;
248using Shader::Maxwell::TranslateProgram; 50using Shader::Maxwell::TranslateProgram;
51using VideoCommon::ComputeEnvironment;
52using VideoCommon::FileEnvironment;
53using VideoCommon::GenericEnvironment;
54using VideoCommon::GraphicsEnvironment;
249 55
250// TODO: Move this to a separate file 56template <typename Container>
251constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; 57auto MakeSpan(Container& container) {
252constexpr u32 CACHE_VERSION{2}; 58 return std::span(container.data(), container.size());
253
254class GraphicsEnvironment final : public GenericEnvironment {
255public:
256 explicit GraphicsEnvironment() = default;
257 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
258 Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
259 GPUVAddr program_base_, u32 start_address_)
260 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
261 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
262 switch (program) {
263 case Maxwell::ShaderProgram::VertexA:
264 stage = Shader::Stage::VertexA;
265 stage_index = 0;
266 break;
267 case Maxwell::ShaderProgram::VertexB:
268 stage = Shader::Stage::VertexB;
269 stage_index = 0;
270 break;
271 case Maxwell::ShaderProgram::TesselationControl:
272 stage = Shader::Stage::TessellationControl;
273 stage_index = 1;
274 break;
275 case Maxwell::ShaderProgram::TesselationEval:
276 stage = Shader::Stage::TessellationEval;
277 stage_index = 2;
278 break;
279 case Maxwell::ShaderProgram::Geometry:
280 stage = Shader::Stage::Geometry;
281 stage_index = 3;
282 break;
283 case Maxwell::ShaderProgram::Fragment:
284 stage = Shader::Stage::Fragment;
285 stage_index = 4;
286 break;
287 default:
288 UNREACHABLE_MSG("Invalid program={}", program);
289 break;
290 }
291 const u64 local_size{sph.LocalMemorySize()};
292 ASSERT(local_size <= std::numeric_limits<u32>::max());
293 local_memory_size = static_cast<u32>(local_size);
294 texture_bound = maxwell3d->regs.tex_cb_index;
295 }
296
297 ~GraphicsEnvironment() override = default;
298
299 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
300 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
301 ASSERT(cbuf.enabled);
302 u32 value{};
303 if (cbuf_offset < cbuf.size) {
304 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
305 }
306 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
307 return value;
308 }
309
310 Shader::TextureType ReadTextureType(u32 handle) override {
311 const auto& regs{maxwell3d->regs};
312 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
313 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
314 }
315
316private:
317 Tegra::Engines::Maxwell3D* maxwell3d{};
318 size_t stage_index{};
319};
320
321class ComputeEnvironment final : public GenericEnvironment {
322public:
323 explicit ComputeEnvironment() = default;
324 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
325 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
326 u32 start_address_)
327 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
328 &kepler_compute_} {
329 const auto& qmd{kepler_compute->launch_description};
330 stage = Shader::Stage::Compute;
331 local_memory_size = qmd.local_pos_alloc;
332 texture_bound = kepler_compute->regs.tex_cb_index;
333 shared_memory_size = qmd.shared_alloc;
334 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
335 }
336
337 ~ComputeEnvironment() override = default;
338
339 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
340 const auto& qmd{kepler_compute->launch_description};
341 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
342 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
343 u32 value{};
344 if (cbuf_offset < cbuf.size) {
345 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
346 }
347 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
348 return value;
349 }
350
351 Shader::TextureType ReadTextureType(u32 handle) override {
352 const auto& regs{kepler_compute->regs};
353 const auto& qmd{kepler_compute->launch_description};
354 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
355 }
356
357private:
358 Tegra::Engines::KeplerCompute* kepler_compute{};
359};
360
361void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
362 std::ofstream& file) {
363 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
364 return;
365 }
366 const u32 num_envs{static_cast<u32>(envs.size())};
367 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
368 for (const GenericEnvironment* const env : envs) {
369 env->Serialize(file);
370 }
371 file.write(key.data(), key.size_bytes());
372}
373
374template <typename Key, typename Envs>
375void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) {
376 try {
377 std::ofstream file;
378 file.exceptions(std::ifstream::failbit);
379 Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app);
380 if (!file.is_open()) {
381 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename);
382 return;
383 }
384 if (file.tellp() == 0) {
385 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
386 .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
387 }
388 const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key));
389 SerializePipeline(key_span, MakeSpan(envs), file);
390
391 } catch (const std::ios_base::failure& e) {
392 LOG_ERROR(Common_Filesystem, "{}", e.what());
393 if (!Common::FS::Delete(filename)) {
394 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename);
395 }
396 }
397} 59}
398 60
399class FileEnvironment final : public Shader::Environment {
400public:
401 void Deserialize(std::ifstream& file) {
402 u64 code_size{};
403 u64 num_texture_types{};
404 u64 num_cbuf_values{};
405 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
406 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
407 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
408 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
409 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
410 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
411 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
412 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
413 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
414 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
415 file.read(reinterpret_cast<char*>(code.get()), code_size);
416 for (size_t i = 0; i < num_texture_types; ++i) {
417 u32 key;
418 Shader::TextureType type;
419 file.read(reinterpret_cast<char*>(&key), sizeof(key))
420 .read(reinterpret_cast<char*>(&type), sizeof(type));
421 texture_types.emplace(key, type);
422 }
423 for (size_t i = 0; i < num_cbuf_values; ++i) {
424 u64 key;
425 u32 value;
426 file.read(reinterpret_cast<char*>(&key), sizeof(key))
427 .read(reinterpret_cast<char*>(&value), sizeof(value));
428 cbuf_values.emplace(key, value);
429 }
430 if (stage == Shader::Stage::Compute) {
431 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
432 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
433 } else {
434 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
435 }
436 }
437
438 u64 ReadInstruction(u32 address) override {
439 if (address < read_lowest || address > read_highest) {
440 throw Shader::LogicError("Out of bounds address {}", address);
441 }
442 return code[(address - read_lowest) / sizeof(u64)];
443 }
444
445 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
446 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
447 if (it == cbuf_values.end()) {
448 throw Shader::LogicError("Uncached read texture type");
449 }
450 return it->second;
451 }
452
453 Shader::TextureType ReadTextureType(u32 handle) override {
454 const auto it{texture_types.find(handle)};
455 if (it == texture_types.end()) {
456 throw Shader::LogicError("Uncached read texture type");
457 }
458 return it->second;
459 }
460
461 u32 LocalMemorySize() const override {
462 return local_memory_size;
463 }
464
465 u32 SharedMemorySize() const override {
466 return shared_memory_size;
467 }
468
469 u32 TextureBoundBuffer() const override {
470 return texture_bound;
471 }
472
473 std::array<u32, 3> WorkgroupSize() const override {
474 return workgroup_size;
475 }
476
477private:
478 std::unique_ptr<u64[]> code;
479 std::unordered_map<u32, Shader::TextureType> texture_types;
480 std::unordered_map<u64, u32> cbuf_values;
481 std::array<u32, 3> workgroup_size{};
482 u32 local_memory_size{};
483 u32 shared_memory_size{};
484 u32 texture_bound{};
485 u32 read_lowest{};
486 u32 read_highest{};
487};
488
489Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { 61Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
490 switch (comparison) { 62 switch (comparison) {
491 case Maxwell::ComparisonOp::Never: 63 case Maxwell::ComparisonOp::Never:
@@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
518} 90}
519} // Anonymous namespace 91} // Anonymous namespace
520 92
521void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
522 const VideoCore::DiskResourceLoadCallback& callback) {
523 if (title_id == 0) {
524 return;
525 }
526 std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)};
527 std::string base_dir{shader_dir + "/vulkan"};
528 std::string transferable_dir{base_dir + "/transferable"};
529 std::string precompiled_dir{base_dir + "/precompiled"};
530 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
531 !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
532 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
533 return;
534 }
535 pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id);
536
537 struct {
538 std::mutex mutex;
539 size_t total{0};
540 size_t built{0};
541 bool has_loaded{false};
542 } state;
543
544 std::ifstream file;
545 Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate);
546 if (!file.is_open()) {
547 return;
548 }
549 file.exceptions(std::ifstream::failbit);
550 const auto end{file.tellg()};
551 file.seekg(0, std::ios::beg);
552
553 std::array<char, 8> magic_number;
554 u32 cache_version;
555 file.read(magic_number.data(), magic_number.size())
556 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
557 if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
558 file.close();
559 if (Common::FS::Delete(pipeline_cache_filename)) {
560 if (magic_number != MAGIC_NUMBER) {
561 LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file");
562 }
563 if (cache_version != CACHE_VERSION) {
564 LOG_INFO(Render_Vulkan, "Deleting old pipeline cache");
565 }
566 } else {
567 LOG_ERROR(Render_Vulkan,
568 "Invalid pipeline cache file and failed to delete it in \"{}\"",
569 pipeline_cache_filename);
570 }
571 return;
572 }
573 while (file.tellg() != end) {
574 if (stop_loading) {
575 return;
576 }
577 u32 num_envs{};
578 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
579 std::vector<FileEnvironment> envs(num_envs);
580 for (FileEnvironment& env : envs) {
581 env.Deserialize(file);
582 }
583 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
584 ComputePipelineCacheKey key;
585 file.read(reinterpret_cast<char*>(&key), sizeof(key));
586
587 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
588 ShaderPools pools;
589 auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)};
590
591 std::lock_guard lock{state.mutex};
592 compute_cache.emplace(key, std::move(pipeline));
593 ++state.built;
594 if (state.has_loaded) {
595 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
596 }
597 });
598 } else {
599 GraphicsPipelineCacheKey key;
600 file.read(reinterpret_cast<char*>(&key), sizeof(key));
601
602 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
603 ShaderPools pools;
604 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
605 for (auto& env : envs) {
606 env_ptrs.push_back(&env);
607 }
608 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
609
610 std::lock_guard lock{state.mutex};
611 graphics_cache.emplace(key, std::move(pipeline));
612 ++state.built;
613 if (state.has_loaded) {
614 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
615 }
616 });
617 }
618 ++state.total;
619 }
620 {
621 std::lock_guard lock{state.mutex};
622 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
623 state.has_loaded = true;
624 }
625 workers.WaitForRequests();
626}
627
628size_t ComputePipelineCacheKey::Hash() const noexcept { 93size_t ComputePipelineCacheKey::Hash() const noexcept {
629 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); 94 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
630 return static_cast<size_t>(hash); 95 return static_cast<size_t>(hash);
@@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
643 return std::memcmp(&rhs, this, Size()) == 0; 108 return std::memcmp(&rhs, this, Size()) == 0;
644} 109}
645 110
646PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 111PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
647 Tegra::Engines::Maxwell3D& maxwell3d_,
648 Tegra::Engines::KeplerCompute& kepler_compute_, 112 Tegra::Engines::KeplerCompute& kepler_compute_,
649 Tegra::MemoryManager& gpu_memory_, const Device& device_, 113 Tegra::MemoryManager& gpu_memory_, const Device& device_,
650 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, 114 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
651 VKUpdateDescriptorQueue& update_descriptor_queue_, 115 VKUpdateDescriptorQueue& update_descriptor_queue_,
652 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, 116 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
653 TextureCache& texture_cache_) 117 TextureCache& texture_cache_)
654 : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, 118 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
655 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, 119 device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
656 scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
657 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, 120 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
658 buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, 121 buffer_cache{buffer_cache_}, texture_cache{texture_cache_},
659 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), 122 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
@@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default;
700GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { 163GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
701 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 164 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
702 165
703 if (!RefreshStages()) { 166 if (!RefreshStages(graphics_key.unique_hashes)) {
704 current_pipeline = nullptr; 167 current_pipeline = nullptr;
705 return nullptr; 168 return nullptr;
706 } 169 }
@@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
728ComputePipeline* PipelineCache::CurrentComputePipeline() { 191ComputePipeline* PipelineCache::CurrentComputePipeline() {
729 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 192 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
730 193
731 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; 194 const ShaderInfo* const shader{ComputeShader()};
732 const auto& qmd{kepler_compute.launch_description};
733 const GPUVAddr shader_addr{program_base + qmd.program_start};
734 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
735 if (!cpu_shader_addr) {
736 return nullptr;
737 }
738 const ShaderInfo* shader{TryGet(*cpu_shader_addr)};
739 if (!shader) { 195 if (!shader) {
740 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; 196 return nullptr;
741 shader = MakeShaderInfo(env, *cpu_shader_addr);
742 } 197 }
198 const auto& qmd{kepler_compute.launch_description};
743 const ComputePipelineCacheKey key{ 199 const ComputePipelineCacheKey key{
744 .unique_hash{shader->unique_hash}, 200 .unique_hash = shader->unique_hash,
745 .shared_memory_size{qmd.shared_alloc}, 201 .shared_memory_size = qmd.shared_alloc,
746 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, 202 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
747 }; 203 };
748 const auto [pair, is_new]{compute_cache.try_emplace(key)}; 204 const auto [pair, is_new]{compute_cache.try_emplace(key)};
@@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
754 return pipeline.get(); 210 return pipeline.get();
755} 211}
756 212
757bool PipelineCache::RefreshStages() { 213void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
758 auto& dirty{maxwell3d.dirty.flags}; 214 const VideoCore::DiskResourceLoadCallback& callback) {
759 if (!dirty[VideoCommon::Dirty::Shaders]) { 215 if (title_id == 0) {
760 return last_valid_shaders; 216 return;
761 } 217 }
762 dirty[VideoCommon::Dirty::Shaders] = false; 218 auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
763 219 auto base_dir{shader_dir / "vulkan"};
764 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 220 auto transferable_dir{base_dir / "transferable"};
765 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 221 auto precompiled_dir{base_dir / "precompiled"};
766 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { 222 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
767 graphics_key.unique_hashes[index] = u128{}; 223 !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
768 continue; 224 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
769 } 225 return;
770 const auto& shader_config{maxwell3d.regs.shader_config[index]};
771 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
772 const GPUVAddr shader_addr{base_addr + shader_config.offset};
773 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
774 if (!cpu_shader_addr) {
775 LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
776 last_valid_shaders = false;
777 return false;
778 }
779 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
780 if (!shader_info) {
781 const u32 start_address{shader_config.offset};
782 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
783 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
784 }
785 shader_infos[index] = shader_info;
786 graphics_key.unique_hashes[index] = shader_info->unique_hash;
787 } 226 }
788 last_valid_shaders = true; 227 pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id);
789 return true;
790}
791 228
792const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { 229 struct {
793 auto info = std::make_unique<ShaderInfo>(); 230 std::mutex mutex;
794 if (const std::optional<u128> cached_hash{env.Analyze()}) { 231 size_t total{0};
795 info->unique_hash = *cached_hash; 232 size_t built{0};
796 info->size_bytes = env.CachedSize(); 233 bool has_loaded{false};
797 } else { 234 } state;
798 // Slow path, not really hit on commercial games 235
799 // Build a control flow graph to get the real shader size 236 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
800 main_pools.flow_block.ReleaseContents(); 237 ComputePipelineCacheKey key;
801 Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; 238 file.read(reinterpret_cast<char*>(&key), sizeof(key));
802 info->unique_hash = env.CalculateHash(); 239
803 info->size_bytes = env.ReadSize(); 240 workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
804 } 241 ShaderPools pools;
805 const size_t size_bytes{info->size_bytes}; 242 auto pipeline{CreateComputePipeline(pools, key, env, false)};
806 const ShaderInfo* const result{info.get()}; 243
807 Register(std::move(info), cpu_addr, size_bytes); 244 std::lock_guard lock{state.mutex};
808 return result; 245 compute_cache.emplace(key, std::move(pipeline));
246 ++state.built;
247 if (state.has_loaded) {
248 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
249 }
250 });
251 ++state.total;
252 }};
253 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
254 GraphicsPipelineCacheKey key;
255 file.read(reinterpret_cast<char*>(&key), sizeof(key));
256
257 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
258 ShaderPools pools;
259 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
260 for (auto& env : envs) {
261 env_ptrs.push_back(&env);
262 }
263 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
264
265 std::lock_guard lock{state.mutex};
266 graphics_cache.emplace(key, std::move(pipeline));
267 ++state.built;
268 if (state.has_loaded) {
269 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
270 }
271 });
272 ++state.total;
273 }};
274 VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics);
275
276 std::unique_lock lock{state.mutex};
277 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
278 state.has_loaded = true;
279 lock.unlock();
280
281 workers.WaitForRequests();
809} 282}
810 283
811std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( 284std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
@@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
815 size_t env_index{0}; 288 size_t env_index{0};
816 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; 289 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
817 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 290 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
818 if (key.unique_hashes[index] == u128{}) { 291 if (key.unique_hashes[index] == 0) {
819 continue; 292 continue;
820 } 293 }
821 Shader::Environment& env{*envs[env_index]}; 294 Shader::Environment& env{*envs[env_index]};
@@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
830 303
831 u32 binding{0}; 304 u32 binding{0};
832 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 305 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
833 if (key.unique_hashes[index] == u128{}) { 306 if (key.unique_hashes[index] == 0) {
834 continue; 307 continue;
835 } 308 }
836 UNIMPLEMENTED_IF(index == 0); 309 UNIMPLEMENTED_IF(index == 0);
@@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
844 device.SaveShader(code); 317 device.SaveShader(code);
845 modules[stage_index] = BuildShader(device, code); 318 modules[stage_index] = BuildShader(device, code);
846 if (device.HasDebuggingToolAttached()) { 319 if (device.HasDebuggingToolAttached()) {
847 const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], 320 const std::string name{fmt::format("{:016x}", key.unique_hashes[index])};
848 key.unique_hashes[index][1])};
849 modules[stage_index].SetObjectNameEXT(name.c_str()); 321 modules[stage_index].SetObjectNameEXT(name.c_str());
850 } 322 }
851 } 323 }
@@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
863 335
864 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 336 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
865 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 337 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
866 if (graphics_key.unique_hashes[index] == u128{}) { 338 if (graphics_key.unique_hashes[index] == 0) {
867 continue; 339 continue;
868 } 340 }
869 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 341 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
871 const u32 start_address{maxwell3d.regs.shader_config[index].offset}; 343 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
872 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; 344 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
873 env.SetCachedSize(shader_infos[index]->size_bytes); 345 env.SetCachedSize(shader_infos[index]->size_bytes);
874
875 envs.push_back(&env); 346 envs.push_back(&env);
876 } 347 }
877 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; 348 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
@@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
882 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> 353 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
883 env_ptrs; 354 env_ptrs;
884 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 355 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
885 if (key.unique_hashes[index] != u128{}) { 356 if (key.unique_hashes[index] != 0) {
886 env_ptrs.push_back(&envs[index]); 357 env_ptrs.push_back(&envs[index]);
887 } 358 }
888 } 359 }
889 SerializePipeline(key, env_ptrs, pipeline_cache_filename); 360 VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename);
890 }); 361 });
891 return pipeline; 362 return pipeline;
892} 363}
@@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
902 auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; 373 auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
903 if (!pipeline_cache_filename.empty()) { 374 if (!pipeline_cache_filename.empty()) {
904 serialization_thread.QueueWork([this, key, env = std::move(env)] { 375 serialization_thread.QueueWork([this, key, env = std::move(env)] {
905 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, 376 VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
906 pipeline_cache_filename); 377 pipeline_cache_filename);
907 }); 378 });
908 } 379 }
909 return pipeline; 380 return pipeline;
@@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
921 device.SaveShader(code); 392 device.SaveShader(code);
922 vk::ShaderModule spv_module{BuildShader(device, code)}; 393 vk::ShaderModule spv_module{BuildShader(device, code)};
923 if (device.HasDebuggingToolAttached()) { 394 if (device.HasDebuggingToolAttached()) {
924 const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; 395 const auto name{fmt::format("{:016x}", key.unique_hash)};
925 spv_module.SetObjectNameEXT(name.c_str()); 396 spv_module.SetObjectNameEXT(name.c_str());
926 } 397 }
927 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 398 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
@@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
1035 Shader::Profile profile{base_profile}; 506 Shader::Profile profile{base_profile};
1036 507
1037 const Shader::Stage stage{program.stage}; 508 const Shader::Stage stage{program.stage};
1038 const bool has_geometry{key.unique_hashes[4] != u128{}}; 509 const bool has_geometry{key.unique_hashes[4] != 0};
1039 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; 510 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
1040 const float point_size{Common::BitCast<float>(key.state.point_size)}; 511 const float point_size{Common::BitCast<float>(key.state.point_size)};
1041 switch (stage) { 512 switch (stage) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index eec17d3fd..4e48b4956 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <filesystem>
9#include <iosfwd> 10#include <iosfwd>
10#include <memory> 11#include <memory>
11#include <type_traits> 12#include <type_traits>
@@ -42,7 +43,7 @@ namespace Vulkan {
42using Maxwell = Tegra::Engines::Maxwell3D::Regs; 43using Maxwell = Tegra::Engines::Maxwell3D::Regs;
43 44
44struct ComputePipelineCacheKey { 45struct ComputePipelineCacheKey {
45 u128 unique_hash; 46 u64 unique_hash;
46 u32 shared_memory_size; 47 u32 shared_memory_size;
47 std::array<u32, 3> workgroup_size; 48 std::array<u32, 3> workgroup_size;
48 49
@@ -76,16 +77,12 @@ namespace Vulkan {
76class ComputePipeline; 77class ComputePipeline;
77class Device; 78class Device;
78class DescriptorPool; 79class DescriptorPool;
79class GenericEnvironment;
80class RasterizerVulkan; 80class RasterizerVulkan;
81class RenderPassCache; 81class RenderPassCache;
82class VKScheduler; 82class VKScheduler;
83class VKUpdateDescriptorQueue; 83class VKUpdateDescriptorQueue;
84 84
85struct ShaderInfo { 85using VideoCommon::ShaderInfo;
86 u128 unique_hash{};
87 size_t size_bytes{};
88};
89 86
90struct ShaderPools { 87struct ShaderPools {
91 void ReleaseContents() { 88 void ReleaseContents() {
@@ -99,17 +96,16 @@ struct ShaderPools {
99 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; 96 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
100}; 97};
101 98
102class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { 99class PipelineCache : public VideoCommon::ShaderCache {
103public: 100public:
104 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 101 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
105 Tegra::Engines::Maxwell3D& maxwell3d,
106 Tegra::Engines::KeplerCompute& kepler_compute, 102 Tegra::Engines::KeplerCompute& kepler_compute,
107 Tegra::MemoryManager& gpu_memory, const Device& device, 103 Tegra::MemoryManager& gpu_memory, const Device& device,
108 VKScheduler& scheduler, DescriptorPool& descriptor_pool, 104 VKScheduler& scheduler, DescriptorPool& descriptor_pool,
109 VKUpdateDescriptorQueue& update_descriptor_queue, 105 VKUpdateDescriptorQueue& update_descriptor_queue,
110 RenderPassCache& render_pass_cache, BufferCache& buffer_cache, 106 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
111 TextureCache& texture_cache); 107 TextureCache& texture_cache);
112 ~PipelineCache() override; 108 ~PipelineCache();
113 109
114 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); 110 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
115 111
@@ -119,10 +115,6 @@ public:
119 const VideoCore::DiskResourceLoadCallback& callback); 115 const VideoCore::DiskResourceLoadCallback& callback);
120 116
121private: 117private:
122 bool RefreshStages();
123
124 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
125
126 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); 118 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
127 119
128 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( 120 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
@@ -140,11 +132,6 @@ private:
140 Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, 132 Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key,
141 const Shader::IR::Program& program); 133 const Shader::IR::Program& program);
142 134
143 Tegra::GPU& gpu;
144 Tegra::Engines::Maxwell3D& maxwell3d;
145 Tegra::Engines::KeplerCompute& kepler_compute;
146 Tegra::MemoryManager& gpu_memory;
147
148 const Device& device; 135 const Device& device;
149 VKScheduler& scheduler; 136 VKScheduler& scheduler;
150 DescriptorPool& descriptor_pool; 137 DescriptorPool& descriptor_pool;
@@ -156,16 +143,13 @@ private:
156 GraphicsPipelineCacheKey graphics_key{}; 143 GraphicsPipelineCacheKey graphics_key{};
157 GraphicsPipeline* current_pipeline{}; 144 GraphicsPipeline* current_pipeline{};
158 145
159 std::array<const ShaderInfo*, 6> shader_infos{};
160 bool last_valid_shaders{};
161
162 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; 146 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
163 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; 147 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
164 148
165 ShaderPools main_pools; 149 ShaderPools main_pools;
166 150
167 Shader::Profile base_profile; 151 Shader::Profile base_profile;
168 std::string pipeline_cache_filename; 152 std::filesystem::path pipeline_cache_filename;
169 153
170 Common::ThreadWorker workers; 154 Common::ThreadWorker workers;
171 Common::ThreadWorker serialization_thread; 155 Common::ThreadWorker serialization_thread;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7df169c85..fa6daeb3a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
149 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 149 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
150 update_descriptor_queue, descriptor_pool), 150 update_descriptor_queue, descriptor_pool),
151 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 151 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
152 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 152 pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
153 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, 153 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
154 texture_cache), 154 texture_cache),
155 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, 155 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..b8b8eace5
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,233 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <vector>
8
9#include "common/assert.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/object_pool.h"
12#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
16#include "video_core/shader_cache.h"
17#include "video_core/shader_environment.h"
18
19namespace VideoCommon {
20
21void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
22 std::scoped_lock lock{invalidation_mutex};
23 InvalidatePagesInRegion(addr, size);
24 RemovePendingShaders();
25}
26
27void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
28 std::lock_guard lock{invalidation_mutex};
29 InvalidatePagesInRegion(addr, size);
30}
31
32void ShaderCache::SyncGuestHost() {
33 std::scoped_lock lock{invalidation_mutex};
34 RemovePendingShaders();
35}
36
37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
38 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
39 Tegra::Engines::KeplerCompute& kepler_compute_)
40 : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
41 rasterizer{rasterizer_} {}
42
43bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
44 auto& dirty{maxwell3d.dirty.flags};
45 if (!dirty[VideoCommon::Dirty::Shaders]) {
46 return last_shaders_valid;
47 }
48 dirty[VideoCommon::Dirty::Shaders] = false;
49
50 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
51 for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
52 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
53 unique_hashes[index] = 0;
54 continue;
55 }
56 const auto& shader_config{maxwell3d.regs.shader_config[index]};
57 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
58 const GPUVAddr shader_addr{base_addr + shader_config.offset};
59 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
60 if (!cpu_shader_addr) {
61 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
62 last_shaders_valid = false;
63 return false;
64 }
65 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
66 if (!shader_info) {
67 const u32 start_address{shader_config.offset};
68 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
69 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
70 }
71 shader_infos[index] = shader_info;
72 unique_hashes[index] = shader_info->unique_hash;
73 }
74 last_shaders_valid = true;
75 return true;
76}
77
78const ShaderInfo* ShaderCache::ComputeShader() {
79 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
80 const auto& qmd{kepler_compute.launch_description};
81 const GPUVAddr shader_addr{program_base + qmd.program_start};
82 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
83 if (!cpu_shader_addr) {
84 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
85 return nullptr;
86 }
87 if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
88 return shader;
89 }
90 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
91 return MakeShaderInfo(env, *cpu_shader_addr);
92}
93
94ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
95 std::scoped_lock lock{lookup_mutex};
96
97 const auto it = lookup_cache.find(addr);
98 if (it == lookup_cache.end()) {
99 return nullptr;
100 }
101 return it->second->data;
102}
103
104void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
105 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
106
107 const VAddr addr_end = addr + size;
108 Entry* const entry = NewEntry(addr, addr_end, data.get());
109
110 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
111 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
112 invalidation_cache[page].push_back(entry);
113 }
114
115 storage.push_back(std::move(data));
116
117 rasterizer.UpdatePagesCachedCount(addr, size, 1);
118}
119
120void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
121 const VAddr addr_end = addr + size;
122 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
123 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
124 auto it = invalidation_cache.find(page);
125 if (it == invalidation_cache.end()) {
126 continue;
127 }
128 InvalidatePageEntries(it->second, addr, addr_end);
129 }
130}
131
132void ShaderCache::RemovePendingShaders() {
133 if (marked_for_removal.empty()) {
134 return;
135 }
136 // Remove duplicates
137 std::ranges::sort(marked_for_removal);
138 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
139 marked_for_removal.end());
140
141 std::vector<ShaderInfo*> removed_shaders;
142 removed_shaders.reserve(marked_for_removal.size());
143
144 std::scoped_lock lock{lookup_mutex};
145
146 for (Entry* const entry : marked_for_removal) {
147 removed_shaders.push_back(entry->data);
148
149 const auto it = lookup_cache.find(entry->addr_start);
150 ASSERT(it != lookup_cache.end());
151 lookup_cache.erase(it);
152 }
153 marked_for_removal.clear();
154
155 if (!removed_shaders.empty()) {
156 RemoveShadersFromStorage(std::move(removed_shaders));
157 }
158}
159
160void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
161 size_t index = 0;
162 while (index < entries.size()) {
163 Entry* const entry = entries[index];
164 if (!entry->Overlaps(addr, addr_end)) {
165 ++index;
166 continue;
167 }
168
169 UnmarkMemory(entry);
170 RemoveEntryFromInvalidationCache(entry);
171 marked_for_removal.push_back(entry);
172 }
173}
174
175void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
176 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
177 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
178 const auto entries_it = invalidation_cache.find(page);
179 ASSERT(entries_it != invalidation_cache.end());
180 std::vector<Entry*>& entries = entries_it->second;
181
182 const auto entry_it = std::ranges::find(entries, entry);
183 ASSERT(entry_it != entries.end());
184 entries.erase(entry_it);
185 }
186}
187
188void ShaderCache::UnmarkMemory(Entry* entry) {
189 if (!entry->is_memory_marked) {
190 return;
191 }
192 entry->is_memory_marked = false;
193
194 const VAddr addr = entry->addr_start;
195 const size_t size = entry->addr_end - addr;
196 rasterizer.UpdatePagesCachedCount(addr, size, -1);
197}
198
199void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
200 // Remove them from the cache
201 std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
202 return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
203 });
204}
205
206ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
207 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
208 Entry* const entry_pointer = entry.get();
209
210 lookup_cache.emplace(addr, std::move(entry));
211 return entry_pointer;
212}
213
214const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
215 auto info = std::make_unique<ShaderInfo>();
216 if (const std::optional<u64> cached_hash{env.Analyze()}) {
217 info->unique_hash = *cached_hash;
218 info->size_bytes = env.CachedSize();
219 } else {
220 // Slow path, not really hit on commercial games
221 // Build a control flow graph to get the real shader size
222 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
223 Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
224 info->unique_hash = env.CalculateHash();
225 info->size_bytes = env.ReadSize();
226 }
227 const size_t size_bytes{info->size_bytes};
228 const ShaderInfo* const result{info.get()};
229 Register(std::move(info), cpu_addr, size_bytes);
230 return result;
231}
232
233} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..89a4bcc84 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -4,20 +4,28 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
8#include <memory> 7#include <memory>
9#include <mutex> 8#include <mutex>
10#include <unordered_map> 9#include <unordered_map>
11#include <utility> 10#include <utility>
12#include <vector> 11#include <vector>
13 12
14#include "common/assert.h"
15#include "common/common_types.h" 13#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
17 15
16namespace Tegra {
17class MemoryManager;
18}
19
18namespace VideoCommon { 20namespace VideoCommon {
19 21
20template <class T> 22class GenericEnvironment;
23
24struct ShaderInfo {
25 u64 unique_hash{};
26 size_t size_bytes{};
27};
28
21class ShaderCache { 29class ShaderCache {
22 static constexpr u64 PAGE_BITS = 14; 30 static constexpr u64 PAGE_BITS = 14;
23 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; 31 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
@@ -25,206 +33,100 @@ class ShaderCache {
25 struct Entry { 33 struct Entry {
26 VAddr addr_start; 34 VAddr addr_start;
27 VAddr addr_end; 35 VAddr addr_end;
28 T* data; 36 ShaderInfo* data;
29 37
30 bool is_memory_marked = true; 38 bool is_memory_marked = true;
31 39
32 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { 40 bool Overlaps(VAddr start, VAddr end) const noexcept {
33 return start < addr_end && addr_start < end; 41 return start < addr_end && addr_start < end;
34 } 42 }
35 }; 43 };
36 44
37public: 45public:
38 virtual ~ShaderCache() = default;
39
40 /// @brief Removes shaders inside a given region 46 /// @brief Removes shaders inside a given region
41 /// @note Checks for ranges 47 /// @note Checks for ranges
42 /// @param addr Start address of the invalidation 48 /// @param addr Start address of the invalidation
43 /// @param size Number of bytes of the invalidation 49 /// @param size Number of bytes of the invalidation
44 void InvalidateRegion(VAddr addr, std::size_t size) { 50 void InvalidateRegion(VAddr addr, size_t size);
45 std::scoped_lock lock{invalidation_mutex};
46 InvalidatePagesInRegion(addr, size);
47 RemovePendingShaders();
48 }
49 51
50 /// @brief Unmarks a memory region as cached and marks it for removal 52 /// @brief Unmarks a memory region as cached and marks it for removal
51 /// @param addr Start address of the CPU write operation 53 /// @param addr Start address of the CPU write operation
52 /// @param size Number of bytes of the CPU write operation 54 /// @param size Number of bytes of the CPU write operation
53 void OnCPUWrite(VAddr addr, std::size_t size) { 55 void OnCPUWrite(VAddr addr, size_t size);
54 std::lock_guard lock{invalidation_mutex};
55 InvalidatePagesInRegion(addr, size);
56 }
57 56
58 /// @brief Flushes delayed removal operations 57 /// @brief Flushes delayed removal operations
59 void SyncGuestHost() { 58 void SyncGuestHost();
60 std::scoped_lock lock{invalidation_mutex}; 59
61 RemovePendingShaders(); 60protected:
62 } 61 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
62 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
63 Tegra::Engines::KeplerCompute& kepler_compute_);
64
65 /// @brief Update the hashes and information of shader stages
66 /// @param unique_hashes Shader hashes to store into when a stage is enabled
67 /// @return True no success, false on error
68 bool RefreshStages(std::array<u64, 6>& unique_hashes);
69
70 /// @brief Returns information about the current compute shader
71 /// @return Pointer to a valid shader, nullptr on error
72 const ShaderInfo* ComputeShader();
73
74 Tegra::MemoryManager& gpu_memory;
75 Tegra::Engines::Maxwell3D& maxwell3d;
76 Tegra::Engines::KeplerCompute& kepler_compute;
63 77
78 std::array<const ShaderInfo*, 6> shader_infos{};
79 bool last_shaders_valid = false;
80
81private:
64 /// @brief Tries to obtain a cached shader starting in a given address 82 /// @brief Tries to obtain a cached shader starting in a given address
65 /// @note Doesn't check for ranges, the given address has to be the start of the shader 83 /// @note Doesn't check for ranges, the given address has to be the start of the shader
66 /// @param addr Start address of the shader, this doesn't cache for region 84 /// @param addr Start address of the shader, this doesn't cache for region
67 /// @return Pointer to a valid shader, nullptr when nothing is found 85 /// @return Pointer to a valid shader, nullptr when nothing is found
68 T* TryGet(VAddr addr) const { 86 ShaderInfo* TryGet(VAddr addr) const;
69 std::scoped_lock lock{lookup_mutex};
70
71 const auto it = lookup_cache.find(addr);
72 if (it == lookup_cache.end()) {
73 return nullptr;
74 }
75 return it->second->data;
76 }
77
78protected:
79 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
80 87
81 /// @brief Register in the cache a given entry 88 /// @brief Register in the cache a given entry
82 /// @param data Shader to store in the cache 89 /// @param data Shader to store in the cache
83 /// @param addr Start address of the shader that will be registered 90 /// @param addr Start address of the shader that will be registered
84 /// @param size Size in bytes of the shader 91 /// @param size Size in bytes of the shader
85 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { 92 void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
86 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
87
88 const VAddr addr_end = addr + size;
89 Entry* const entry = NewEntry(addr, addr_end, data.get());
90
91 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
92 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
93 invalidation_cache[page].push_back(entry);
94 }
95
96 storage.push_back(std::move(data));
97 93
98 rasterizer.UpdatePagesCachedCount(addr, size, 1);
99 }
100
101 /// @brief Called when a shader is going to be removed
102 /// @param shader Shader that will be removed
103 /// @pre invalidation_cache is locked
104 /// @pre lookup_mutex is locked
105 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
106
107private:
108 /// @brief Invalidate pages in a given region 94 /// @brief Invalidate pages in a given region
109 /// @pre invalidation_mutex is locked 95 /// @pre invalidation_mutex is locked
110 void InvalidatePagesInRegion(VAddr addr, std::size_t size) { 96 void InvalidatePagesInRegion(VAddr addr, size_t size);
111 const VAddr addr_end = addr + size;
112 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
113 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
114 auto it = invalidation_cache.find(page);
115 if (it == invalidation_cache.end()) {
116 continue;
117 }
118 InvalidatePageEntries(it->second, addr, addr_end);
119 }
120 }
121 97
122 /// @brief Remove shaders marked for deletion 98 /// @brief Remove shaders marked for deletion
123 /// @pre invalidation_mutex is locked 99 /// @pre invalidation_mutex is locked
124 void RemovePendingShaders() { 100 void RemovePendingShaders();
125 if (marked_for_removal.empty()) {
126 return;
127 }
128 // Remove duplicates
129 std::sort(marked_for_removal.begin(), marked_for_removal.end());
130 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
131 marked_for_removal.end());
132
133 std::vector<T*> removed_shaders;
134 removed_shaders.reserve(marked_for_removal.size());
135
136 std::scoped_lock lock{lookup_mutex};
137
138 for (Entry* const entry : marked_for_removal) {
139 removed_shaders.push_back(entry->data);
140
141 const auto it = lookup_cache.find(entry->addr_start);
142 ASSERT(it != lookup_cache.end());
143 lookup_cache.erase(it);
144 }
145 marked_for_removal.clear();
146
147 if (!removed_shaders.empty()) {
148 RemoveShadersFromStorage(std::move(removed_shaders));
149 }
150 }
151 101
152 /// @brief Invalidates entries in a given range for the passed page 102 /// @brief Invalidates entries in a given range for the passed page
153 /// @param entries Vector of entries in the page, it will be modified on overlaps 103 /// @param entries Vector of entries in the page, it will be modified on overlaps
154 /// @param addr Start address of the invalidation 104 /// @param addr Start address of the invalidation
155 /// @param addr_end Non-inclusive end address of the invalidation 105 /// @param addr_end Non-inclusive end address of the invalidation
156 /// @pre invalidation_mutex is locked 106 /// @pre invalidation_mutex is locked
157 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { 107 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
158 std::size_t index = 0;
159 while (index < entries.size()) {
160 Entry* const entry = entries[index];
161 if (!entry->Overlaps(addr, addr_end)) {
162 ++index;
163 continue;
164 }
165
166 UnmarkMemory(entry);
167 RemoveEntryFromInvalidationCache(entry);
168 marked_for_removal.push_back(entry);
169 }
170 }
171 108
172 /// @brief Removes all references to an entry in the invalidation cache 109 /// @brief Removes all references to an entry in the invalidation cache
173 /// @param entry Entry to remove from the invalidation cache 110 /// @param entry Entry to remove from the invalidation cache
174 /// @pre invalidation_mutex is locked 111 /// @pre invalidation_mutex is locked
175 void RemoveEntryFromInvalidationCache(const Entry* entry) { 112 void RemoveEntryFromInvalidationCache(const Entry* entry);
176 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
177 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
178 const auto entries_it = invalidation_cache.find(page);
179 ASSERT(entries_it != invalidation_cache.end());
180 std::vector<Entry*>& entries = entries_it->second;
181
182 const auto entry_it = std::find(entries.begin(), entries.end(), entry);
183 ASSERT(entry_it != entries.end());
184 entries.erase(entry_it);
185 }
186 }
187 113
188 /// @brief Unmarks an entry from the rasterizer cache 114 /// @brief Unmarks an entry from the rasterizer cache
189 /// @param entry Entry to unmark from memory 115 /// @param entry Entry to unmark from memory
190 void UnmarkMemory(Entry* entry) { 116 void UnmarkMemory(Entry* entry);
191 if (!entry->is_memory_marked) {
192 return;
193 }
194 entry->is_memory_marked = false;
195
196 const VAddr addr = entry->addr_start;
197 const std::size_t size = entry->addr_end - addr;
198 rasterizer.UpdatePagesCachedCount(addr, size, -1);
199 }
200 117
201 /// @brief Removes a vector of shaders from a list 118 /// @brief Removes a vector of shaders from a list
202 /// @param removed_shaders Shaders to be removed from the storage 119 /// @param removed_shaders Shaders to be removed from the storage
203 /// @pre invalidation_mutex is locked 120 /// @pre invalidation_mutex is locked
204 /// @pre lookup_mutex is locked 121 /// @pre lookup_mutex is locked
205 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { 122 void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
206 // Notify removals
207 for (T* const shader : removed_shaders) {
208 OnShaderRemoval(shader);
209 }
210
211 // Remove them from the cache
212 const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
213 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
214 removed_shaders.end();
215 };
216 std::erase_if(storage, is_removed);
217 }
218 123
219 /// @brief Creates a new entry in the lookup cache and returns its pointer 124 /// @brief Creates a new entry in the lookup cache and returns its pointer
220 /// @pre lookup_mutex is locked 125 /// @pre lookup_mutex is locked
221 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { 126 Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
222 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
223 Entry* const entry_pointer = entry.get();
224 127
225 lookup_cache.emplace(addr, std::move(entry)); 128 /// @brief Create a new shader entry and register it
226 return entry_pointer; 129 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
227 }
228 130
229 VideoCore::RasterizerInterface& rasterizer; 131 VideoCore::RasterizerInterface& rasterizer;
230 132
@@ -233,7 +135,7 @@ private:
233 135
234 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; 136 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
235 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; 137 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
236 std::vector<std::unique_ptr<T>> storage; 138 std::vector<std::unique_ptr<ShaderInfo>> storage;
237 std::vector<Entry*> marked_for_removal; 139 std::vector<Entry*> marked_for_removal;
238}; 140};
239 141
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..5dccc0097
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,453 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <filesystem>
6#include <fstream>
7#include <memory>
8#include <optional>
9#include <utility>
10
11#include "common/assert.h"
12#include "common/cityhash.h"
13#include "common/common_types.h"
14#include "common/div_ceil.h"
15#include "common/fs/fs.h"
16#include "common/logging/log.h"
17#include "shader_recompiler/environment.h"
18#include "video_core/memory_manager.h"
19#include "video_core/shader_environment.h"
20#include "video_core/textures/texture.h"
21
22namespace VideoCommon {
23
24constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
25constexpr u32 CACHE_VERSION = 3;
26
27constexpr size_t INST_SIZE = sizeof(u64);
28
29using Maxwell = Tegra::Engines::Maxwell3D::Regs;
30
31static u64 MakeCbufKey(u32 index, u32 offset) {
32 return (static_cast<u64>(index) << 32) | offset;
33}
34
35static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
36 switch (entry.texture_type) {
37 case Tegra::Texture::TextureType::Texture1D:
38 return Shader::TextureType::Color1D;
39 case Tegra::Texture::TextureType::Texture2D:
40 case Tegra::Texture::TextureType::Texture2DNoMipmap:
41 return Shader::TextureType::Color2D;
42 case Tegra::Texture::TextureType::Texture3D:
43 return Shader::TextureType::Color3D;
44 case Tegra::Texture::TextureType::TextureCubemap:
45 return Shader::TextureType::ColorCube;
46 case Tegra::Texture::TextureType::Texture1DArray:
47 return Shader::TextureType::ColorArray1D;
48 case Tegra::Texture::TextureType::Texture2DArray:
49 return Shader::TextureType::ColorArray2D;
50 case Tegra::Texture::TextureType::Texture1DBuffer:
51 return Shader::TextureType::Buffer;
52 case Tegra::Texture::TextureType::TextureCubeArray:
53 return Shader::TextureType::ColorArrayCube;
54 default:
55 throw Shader::NotImplementedException("Unknown texture type");
56 }
57}
58
59GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
60 u32 start_address_)
61 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
62 start_address = start_address_;
63}
64
65GenericEnvironment::~GenericEnvironment() = default;
66
67u32 GenericEnvironment::TextureBoundBuffer() const {
68 return texture_bound;
69}
70
71u32 GenericEnvironment::LocalMemorySize() const {
72 return local_memory_size;
73}
74
75u32 GenericEnvironment::SharedMemorySize() const {
76 return shared_memory_size;
77}
78
79std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
80 return workgroup_size;
81}
82
83u64 GenericEnvironment::ReadInstruction(u32 address) {
84 read_lowest = std::min(read_lowest, address);
85 read_highest = std::max(read_highest, address);
86
87 if (address >= cached_lowest && address < cached_highest) {
88 return code[(address - cached_lowest) / INST_SIZE];
89 }
90 has_unbound_instructions = true;
91 return gpu_memory->Read<u64>(program_base + address);
92}
93
94std::optional<u64> GenericEnvironment::Analyze() {
95 const std::optional<u64> size{TryFindSize()};
96 if (!size) {
97 return std::nullopt;
98 }
99 cached_lowest = start_address;
100 cached_highest = start_address + static_cast<u32>(*size);
101 return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
102}
103
104void GenericEnvironment::SetCachedSize(size_t size_bytes) {
105 cached_lowest = start_address;
106 cached_highest = start_address + static_cast<u32>(size_bytes);
107 code.resize(CachedSize());
108 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
109}
110
111size_t GenericEnvironment::CachedSize() const noexcept {
112 return cached_highest - cached_lowest + INST_SIZE;
113}
114
115size_t GenericEnvironment::ReadSize() const noexcept {
116 return read_highest - read_lowest + INST_SIZE;
117}
118
119bool GenericEnvironment::CanBeSerialized() const noexcept {
120 return !has_unbound_instructions;
121}
122
123u64 GenericEnvironment::CalculateHash() const {
124 const size_t size{ReadSize()};
125 const auto data{std::make_unique<char[]>(size)};
126 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
127 return Common::CityHash64(data.get(), size);
128}
129
130void GenericEnvironment::Serialize(std::ofstream& file) const {
131 const u64 code_size{static_cast<u64>(CachedSize())};
132 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
133 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
134
135 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
136 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
137 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
138 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
139 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
140 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
141 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
142 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
143 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
144 .write(reinterpret_cast<const char*>(code.data()), code_size);
145 for (const auto [key, type] : texture_types) {
146 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
147 .write(reinterpret_cast<const char*>(&type), sizeof(type));
148 }
149 for (const auto [key, type] : cbuf_values) {
150 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
151 .write(reinterpret_cast<const char*>(&type), sizeof(type));
152 }
153 if (stage == Shader::Stage::Compute) {
154 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
155 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
156 } else {
157 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
158 }
159}
160
161std::optional<u64> GenericEnvironment::TryFindSize() {
162 static constexpr size_t BLOCK_SIZE = 0x1000;
163 static constexpr size_t MAXIMUM_SIZE = 0x100000;
164
165 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
166 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
167
168 GPUVAddr guest_addr{program_base + start_address};
169 size_t offset{0};
170 size_t size{BLOCK_SIZE};
171 while (size <= MAXIMUM_SIZE) {
172 code.resize(size / INST_SIZE);
173 u64* const data = code.data() + offset / INST_SIZE;
174 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
175 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
176 const u64 inst = data[index / INST_SIZE];
177 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
178 return offset + index;
179 }
180 }
181 guest_addr += BLOCK_SIZE;
182 size += BLOCK_SIZE;
183 offset += BLOCK_SIZE;
184 }
185 return std::nullopt;
186}
187
188Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
189 bool via_header_index, u32 raw) {
190 const TextureHandle handle{raw, via_header_index};
191 const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
192 Tegra::Texture::TICEntry entry;
193 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
194 const Shader::TextureType result{ConvertType(entry)};
195 texture_types.emplace(raw, result);
196 return result;
197}
198
199GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
200 Tegra::MemoryManager& gpu_memory_,
201 Maxwell::ShaderProgram program, GPUVAddr program_base_,
202 u32 start_address_)
203 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
204 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
205 switch (program) {
206 case Maxwell::ShaderProgram::VertexA:
207 stage = Shader::Stage::VertexA;
208 stage_index = 0;
209 break;
210 case Maxwell::ShaderProgram::VertexB:
211 stage = Shader::Stage::VertexB;
212 stage_index = 0;
213 break;
214 case Maxwell::ShaderProgram::TesselationControl:
215 stage = Shader::Stage::TessellationControl;
216 stage_index = 1;
217 break;
218 case Maxwell::ShaderProgram::TesselationEval:
219 stage = Shader::Stage::TessellationEval;
220 stage_index = 2;
221 break;
222 case Maxwell::ShaderProgram::Geometry:
223 stage = Shader::Stage::Geometry;
224 stage_index = 3;
225 break;
226 case Maxwell::ShaderProgram::Fragment:
227 stage = Shader::Stage::Fragment;
228 stage_index = 4;
229 break;
230 default:
231 UNREACHABLE_MSG("Invalid program={}", program);
232 break;
233 }
234 const u64 local_size{sph.LocalMemorySize()};
235 ASSERT(local_size <= std::numeric_limits<u32>::max());
236 local_memory_size = static_cast<u32>(local_size);
237 texture_bound = maxwell3d->regs.tex_cb_index;
238}
239
240u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
241 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
242 ASSERT(cbuf.enabled);
243 u32 value{};
244 if (cbuf_offset < cbuf.size) {
245 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
246 }
247 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
248 return value;
249}
250
251Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
252 const auto& regs{maxwell3d->regs};
253 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
254 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
255}
256
257ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
258 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
259 u32 start_address_)
260 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
261 &kepler_compute_} {
262 const auto& qmd{kepler_compute->launch_description};
263 stage = Shader::Stage::Compute;
264 local_memory_size = qmd.local_pos_alloc;
265 texture_bound = kepler_compute->regs.tex_cb_index;
266 shared_memory_size = qmd.shared_alloc;
267 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
268}
269
270u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
271 const auto& qmd{kepler_compute->launch_description};
272 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
273 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
274 u32 value{};
275 if (cbuf_offset < cbuf.size) {
276 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
277 }
278 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
279 return value;
280}
281
282Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
283 const auto& regs{kepler_compute->regs};
284 const auto& qmd{kepler_compute->launch_description};
285 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
286}
287
288void FileEnvironment::Deserialize(std::ifstream& file) {
289 u64 code_size{};
290 u64 num_texture_types{};
291 u64 num_cbuf_values{};
292 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
293 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
294 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
295 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
296 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
297 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
298 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
299 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
300 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
301 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
302 file.read(reinterpret_cast<char*>(code.get()), code_size);
303 for (size_t i = 0; i < num_texture_types; ++i) {
304 u32 key;
305 Shader::TextureType type;
306 file.read(reinterpret_cast<char*>(&key), sizeof(key))
307 .read(reinterpret_cast<char*>(&type), sizeof(type));
308 texture_types.emplace(key, type);
309 }
310 for (size_t i = 0; i < num_cbuf_values; ++i) {
311 u64 key;
312 u32 value;
313 file.read(reinterpret_cast<char*>(&key), sizeof(key))
314 .read(reinterpret_cast<char*>(&value), sizeof(value));
315 cbuf_values.emplace(key, value);
316 }
317 if (stage == Shader::Stage::Compute) {
318 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
319 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
320 } else {
321 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
322 }
323}
324
325u64 FileEnvironment::ReadInstruction(u32 address) {
326 if (address < read_lowest || address > read_highest) {
327 throw Shader::LogicError("Out of bounds address {}", address);
328 }
329 return code[(address - read_lowest) / sizeof(u64)];
330}
331
332u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
333 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
334 if (it == cbuf_values.end()) {
335 throw Shader::LogicError("Uncached read texture type");
336 }
337 return it->second;
338}
339
340Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
341 const auto it{texture_types.find(handle)};
342 if (it == texture_types.end()) {
343 throw Shader::LogicError("Uncached read texture type");
344 }
345 return it->second;
346}
347
348u32 FileEnvironment::LocalMemorySize() const {
349 return local_memory_size;
350}
351
352u32 FileEnvironment::SharedMemorySize() const {
353 return shared_memory_size;
354}
355
356u32 FileEnvironment::TextureBoundBuffer() const {
357 return texture_bound;
358}
359
360std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
361 return workgroup_size;
362}
363
364void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
365 const std::filesystem::path& filename) try {
366 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
367 file.exceptions(std::ifstream::failbit);
368 if (!file.is_open()) {
369 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
370 Common::FS::PathToUTF8String(filename));
371 return;
372 }
373 if (file.tellp() == 0) {
374 // Write header
375 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
376 .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
377 }
378 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
379 return;
380 }
381 const u32 num_envs{static_cast<u32>(envs.size())};
382 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
383 for (const GenericEnvironment* const env : envs) {
384 env->Serialize(file);
385 }
386 file.write(key.data(), key.size_bytes());
387
388} catch (const std::ios_base::failure& e) {
389 LOG_ERROR(Common_Filesystem, "{}", e.what());
390 if (!Common::FS::RemoveFile(filename)) {
391 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
392 Common::FS::PathToUTF8String(filename));
393 }
394}
395
396void LoadPipelines(
397 std::stop_token stop_loading, const std::filesystem::path& filename,
398 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
399 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
400 std::ifstream file(filename, std::ios::binary | std::ios::ate);
401 if (!file.is_open()) {
402 return;
403 }
404 file.exceptions(std::ifstream::failbit);
405 const auto end{file.tellg()};
406 file.seekg(0, std::ios::beg);
407
408 std::array<char, 8> magic_number;
409 u32 cache_version;
410 file.read(magic_number.data(), magic_number.size())
411 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
412 if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
413 file.close();
414 if (Common::FS::RemoveFile(filename)) {
415 if (magic_number != MAGIC_NUMBER) {
416 LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
417 }
418 if (cache_version != CACHE_VERSION) {
419 LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
420 }
421 } else {
422 LOG_ERROR(Common_Filesystem,
423 "Invalid pipeline cache file and failed to delete it in \"{}\"",
424 Common::FS::PathToUTF8String(filename));
425 }
426 return;
427 }
428 while (file.tellg() != end) {
429 if (stop_loading.stop_requested()) {
430 return;
431 }
432 u32 num_envs{};
433 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
434 std::vector<FileEnvironment> envs(num_envs);
435 for (FileEnvironment& env : envs) {
436 env.Deserialize(file);
437 }
438 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
439 load_compute(file, std::move(envs.front()));
440 } else {
441 load_graphics(file, std::move(envs));
442 }
443 }
444
445} catch (const std::ios_base::failure& e) {
446 LOG_ERROR(Common_Filesystem, "{}", e.what());
447 if (!Common::FS::RemoveFile(filename)) {
448 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
449 Common::FS::PathToUTF8String(filename));
450 }
451}
452
453} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..37d712045
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,198 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <filesystem>
10#include <iosfwd>
11#include <limits>
12#include <memory>
13#include <optional>
14#include <span>
15#include <type_traits>
16#include <unordered_map>
17#include <vector>
18
19#include "common/common_types.h"
20#include "common/unique_function.h"
21#include "shader_recompiler/environment.h"
22#include "video_core/engines/kepler_compute.h"
23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/textures/texture.h"
25
26namespace Tegra {
27class Memorymanager;
28}
29
30namespace VideoCommon {
31
32struct TextureHandle {
33 explicit TextureHandle(u32 data, bool via_header_index) {
34 if (via_header_index) {
35 image = data;
36 sampler = data;
37 } else {
38 const Tegra::Texture::TextureHandle handle{data};
39 image = handle.tic_id;
40 sampler = via_header_index ? image : handle.tsc_id.Value();
41 }
42 }
43
44 u32 image;
45 u32 sampler;
46};
47
48class GenericEnvironment : public Shader::Environment {
49public:
50 explicit GenericEnvironment() = default;
51 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
52 u32 start_address_);
53
54 ~GenericEnvironment() override;
55
56 [[nodiscard]] u32 TextureBoundBuffer() const final;
57
58 [[nodiscard]] u32 LocalMemorySize() const final;
59
60 [[nodiscard]] u32 SharedMemorySize() const final;
61
62 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
63
64 [[nodiscard]] u64 ReadInstruction(u32 address) final;
65
66 [[nodiscard]] std::optional<u64> Analyze();
67
68 void SetCachedSize(size_t size_bytes);
69
70 [[nodiscard]] size_t CachedSize() const noexcept;
71
72 [[nodiscard]] size_t ReadSize() const noexcept;
73
74 [[nodiscard]] bool CanBeSerialized() const noexcept;
75
76 [[nodiscard]] u64 CalculateHash() const;
77
78 void Serialize(std::ofstream& file) const;
79
80protected:
81 std::optional<u64> TryFindSize();
82
83 Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
84 u32 raw);
85
86 Tegra::MemoryManager* gpu_memory{};
87 GPUVAddr program_base{};
88
89 std::vector<u64> code;
90 std::unordered_map<u32, Shader::TextureType> texture_types;
91 std::unordered_map<u64, u32> cbuf_values;
92
93 u32 local_memory_size{};
94 u32 texture_bound{};
95 u32 shared_memory_size{};
96 std::array<u32, 3> workgroup_size{};
97
98 u32 read_lowest = std::numeric_limits<u32>::max();
99 u32 read_highest = 0;
100
101 u32 cached_lowest = std::numeric_limits<u32>::max();
102 u32 cached_highest = 0;
103
104 bool has_unbound_instructions = false;
105};
106
107class GraphicsEnvironment final : public GenericEnvironment {
108public:
109 explicit GraphicsEnvironment() = default;
110 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
111 Tegra::MemoryManager& gpu_memory_,
112 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
113 GPUVAddr program_base_, u32 start_address_);
114
115 ~GraphicsEnvironment() override = default;
116
117 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
118
119 Shader::TextureType ReadTextureType(u32 handle) override;
120
121private:
122 Tegra::Engines::Maxwell3D* maxwell3d{};
123 size_t stage_index{};
124};
125
126class ComputeEnvironment final : public GenericEnvironment {
127public:
128 explicit ComputeEnvironment() = default;
129 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
130 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
131 u32 start_address_);
132
133 ~ComputeEnvironment() override = default;
134
135 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
136
137 Shader::TextureType ReadTextureType(u32 handle) override;
138
139private:
140 Tegra::Engines::KeplerCompute* kepler_compute{};
141};
142
143class FileEnvironment final : public Shader::Environment {
144public:
145 FileEnvironment() = default;
146 ~FileEnvironment() override = default;
147
148 FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
149 FileEnvironment(FileEnvironment&&) noexcept = default;
150
151 FileEnvironment& operator=(const FileEnvironment&) = delete;
152 FileEnvironment(const FileEnvironment&) = delete;
153
154 void Deserialize(std::ifstream& file);
155
156 [[nodiscard]] u64 ReadInstruction(u32 address) override;
157
158 [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
159
160 [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
161
162 [[nodiscard]] u32 LocalMemorySize() const override;
163
164 [[nodiscard]] u32 SharedMemorySize() const override;
165
166 [[nodiscard]] u32 TextureBoundBuffer() const override;
167
168 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
169
170private:
171 std::unique_ptr<u64[]> code;
172 std::unordered_map<u32, Shader::TextureType> texture_types;
173 std::unordered_map<u64, u32> cbuf_values;
174 std::array<u32, 3> workgroup_size{};
175 u32 local_memory_size{};
176 u32 shared_memory_size{};
177 u32 texture_bound{};
178 u32 read_lowest{};
179 u32 read_highest{};
180};
181
182void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
183 const std::filesystem::path& filename);
184
185template <typename Key, typename Envs>
186void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) {
187 static_assert(std::is_trivially_copyable_v<Key>);
188 static_assert(std::has_unique_object_representations_v<Key>);
189 SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
190 std::span(envs.data(), envs.size()), filename);
191}
192
193void LoadPipelines(
194 std::stop_token stop_loading, const std::filesystem::path& filename,
195 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
196 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
197
198} // namespace VideoCommon