summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-04-26 03:53:26 -0300
committerGravatar ameerj2021-07-22 21:51:29 -0400
commit025b20f96ae588777e3ff11083cc4184bf418af6 (patch)
tree7cda9932a219409196adfc8a8d7d5793840657c1 /src
parentvulkan: Defer descriptor set work to the Vulkan thread (diff)
downloadyuzu-025b20f96ae588777e3ff11083cc4184bf418af6.tar.gz
yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.tar.xz
yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.zip
shader: Move pipeline cache logic to separate files
Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL.
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h58
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp719
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
-rw-r--r--src/video_core/shader_cache.cpp233
-rw-r--r--src/video_core/shader_cache.h198
-rw-r--r--src/video_core/shader_environment.cpp453
-rw-r--r--src/video_core/shader_environment.h198
12 files changed, 1095 insertions, 824 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3166a69dc..6e0e4b8f5 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -145,7 +145,10 @@ add_library(video_core STATIC
145 renderer_vulkan/vk_texture_cache.h 145 renderer_vulkan/vk_texture_cache.h
146 renderer_vulkan/vk_update_descriptor.cpp 146 renderer_vulkan/vk_update_descriptor.cpp
147 renderer_vulkan/vk_update_descriptor.h 147 renderer_vulkan/vk_update_descriptor.h
148 shader_cache.cpp
148 shader_cache.h 149 shader_cache.h
150 shader_environment.cpp
151 shader_environment.h
149 shader_notify.cpp 152 shader_notify.cpp
150 shader_notify.h 153 shader_notify.h
151 surface.cpp 154 surface.cpp
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1f58f8791..2fdcbe4ba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -217,7 +217,7 @@ private:
217 TextureCache texture_cache; 217 TextureCache texture_cache;
218 BufferCacheRuntime buffer_cache_runtime; 218 BufferCacheRuntime buffer_cache_runtime;
219 BufferCache buffer_cache; 219 BufferCache buffer_cache;
220 ShaderCacheOpenGL shader_cache; 220 ShaderCache shader_cache;
221 QueryCache query_cache; 221 QueryCache query_cache;
222 AccelerateDMA accelerate_dma; 222 AccelerateDMA accelerate_dma;
223 FenceManagerOpenGL fence_manager; 223 FenceManagerOpenGL fence_manager;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4dd166156..c3e490b40 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -29,18 +29,13 @@
29 29
30namespace OpenGL { 30namespace OpenGL {
31 31
32Shader::Shader() = default; 32ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
33 33 Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
34Shader::~Shader() = default; 34 Tegra::Engines::KeplerCompute& kepler_compute_,
35 35 Tegra::MemoryManager& gpu_memory_, const Device& device_)
36ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, 36 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
37 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, 37 emu_window{emu_window_}, gpu{gpu_}, device{device_} {}
38 Tegra::Engines::Maxwell3D& maxwell3d_, 38
39 Tegra::Engines::KeplerCompute& kepler_compute_, 39ShaderCache::~ShaderCache() = default;
40 Tegra::MemoryManager& gpu_memory_, const Device& device_)
41 : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
42 maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
43
44ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
45 40
46} // namespace OpenGL 41} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index ad3d15a76..96520e17c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -36,27 +36,59 @@ class RasterizerOpenGL;
36 36
37using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38 38
39class Shader { 39struct GraphicsProgramKey {
40 struct TransformFeedbackState {
41 struct Layout {
42 u32 stream;
43 u32 varying_count;
44 u32 stride;
45 };
46 std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
47 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
48 };
49
50 std::array<u64, 6> unique_hashes;
51 std::array<u8, Maxwell::NumRenderTargets> color_formats;
52 union {
53 u32 raw;
54 BitField<0, 1, u32> xfb_enabled;
55 BitField<1, 1, u32> early_z;
56 BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
57 BitField<6, 2, u32> tessellation_primitive;
58 BitField<8, 2, u32> tessellation_spacing;
59 BitField<10, 1, u32> tessellation_clockwise;
60 };
61 u32 padding;
62 TransformFeedbackState xfb_state;
63
64 [[nodiscard]] size_t Size() const noexcept {
65 if (xfb_enabled != 0) {
66 return sizeof(GraphicsProgramKey);
67 } else {
68 return offsetof(GraphicsProgramKey, padding);
69 }
70 }
71};
72static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
73static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
74static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
75
76class GraphicsProgram {
40public: 77public:
41 explicit Shader(); 78private:
42 ~Shader();
43}; 79};
44 80
45class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { 81class ShaderCache : public VideoCommon::ShaderCache {
46public: 82public:
47 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, 83 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
48 Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, 84 Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
49 Tegra::Engines::Maxwell3D& maxwell3d_, 85 Tegra::Engines::KeplerCompute& kepler_compute_,
50 Tegra::Engines::KeplerCompute& kepler_compute_, 86 Tegra::MemoryManager& gpu_memory_, const Device& device_);
51 Tegra::MemoryManager& gpu_memory_, const Device& device_); 87 ~ShaderCache();
52 ~ShaderCacheOpenGL() override;
53 88
54private: 89private:
55 Core::Frontend::EmuWindow& emu_window; 90 Core::Frontend::EmuWindow& emu_window;
56 Tegra::GPU& gpu; 91 Tegra::GPU& gpu;
57 Tegra::MemoryManager& gpu_memory;
58 Tegra::Engines::Maxwell3D& maxwell3d;
59 Tegra::Engines::KeplerCompute& kepler_compute;
60 const Device& device; 92 const Device& device;
61}; 93};
62 94
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 85e21f611..e362d13c5 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -23,7 +23,7 @@
23namespace Vulkan { 23namespace Vulkan {
24 24
25struct GraphicsPipelineCacheKey { 25struct GraphicsPipelineCacheKey {
26 std::array<u128, 6> unique_hashes; 26 std::array<u64, 6> unique_hashes;
27 FixedPipelineState state; 27 FixedPipelineState state;
28 28
29 size_t Hash() const noexcept; 29 size_t Hash() const noexcept;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 9d9729022..0822862fe 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -11,7 +11,8 @@
11 11
12#include "common/bit_cast.h" 12#include "common/bit_cast.h"
13#include "common/cityhash.h" 13#include "common/cityhash.h"
14#include "common/file_util.h" 14#include "common/fs/fs.h"
15#include "common/fs/path_util.h"
15#include "common/microprofile.h" 16#include "common/microprofile.h"
16#include "common/thread_worker.h" 17#include "common/thread_worker.h"
17#include "core/core.h" 18#include "core/core.h"
@@ -36,6 +37,7 @@
36#include "video_core/renderer_vulkan/vk_shader_util.h" 37#include "video_core/renderer_vulkan/vk_shader_util.h"
37#include "video_core/renderer_vulkan/vk_update_descriptor.h" 38#include "video_core/renderer_vulkan/vk_update_descriptor.h"
38#include "video_core/shader_cache.h" 39#include "video_core/shader_cache.h"
40#include "video_core/shader_environment.h"
39#include "video_core/shader_notify.h" 41#include "video_core/shader_notify.h"
40#include "video_core/vulkan_common/vulkan_device.h" 42#include "video_core/vulkan_common/vulkan_device.h"
41#include "video_core/vulkan_common/vulkan_wrapper.h" 43#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -43,449 +45,19 @@
43namespace Vulkan { 45namespace Vulkan {
44MICROPROFILE_DECLARE(Vulkan_PipelineCache); 46MICROPROFILE_DECLARE(Vulkan_PipelineCache);
45 47
46template <typename Container>
47auto MakeSpan(Container& container) {
48 return std::span(container.data(), container.size());
49}
50
51static u64 MakeCbufKey(u32 index, u32 offset) {
52 return (static_cast<u64>(index) << 32) | offset;
53}
54
55class GenericEnvironment : public Shader::Environment {
56public:
57 explicit GenericEnvironment() = default;
58 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
59 u32 start_address_)
60 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
61 start_address = start_address_;
62 }
63
64 ~GenericEnvironment() override = default;
65
66 u32 TextureBoundBuffer() const final {
67 return texture_bound;
68 }
69
70 u32 LocalMemorySize() const final {
71 return local_memory_size;
72 }
73
74 u32 SharedMemorySize() const final {
75 return shared_memory_size;
76 }
77
78 std::array<u32, 3> WorkgroupSize() const final {
79 return workgroup_size;
80 }
81
82 u64 ReadInstruction(u32 address) final {
83 read_lowest = std::min(read_lowest, address);
84 read_highest = std::max(read_highest, address);
85
86 if (address >= cached_lowest && address < cached_highest) {
87 return code[(address - cached_lowest) / INST_SIZE];
88 }
89 has_unbound_instructions = true;
90 return gpu_memory->Read<u64>(program_base + address);
91 }
92
93 std::optional<u128> Analyze() {
94 const std::optional<u64> size{TryFindSize()};
95 if (!size) {
96 return std::nullopt;
97 }
98 cached_lowest = start_address;
99 cached_highest = start_address + static_cast<u32>(*size);
100 return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size);
101 }
102
103 void SetCachedSize(size_t size_bytes) {
104 cached_lowest = start_address;
105 cached_highest = start_address + static_cast<u32>(size_bytes);
106 code.resize(CachedSize());
107 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
108 }
109
110 [[nodiscard]] size_t CachedSize() const noexcept {
111 return cached_highest - cached_lowest + INST_SIZE;
112 }
113
114 [[nodiscard]] size_t ReadSize() const noexcept {
115 return read_highest - read_lowest + INST_SIZE;
116 }
117
118 [[nodiscard]] bool CanBeSerialized() const noexcept {
119 return !has_unbound_instructions;
120 }
121
122 [[nodiscard]] u128 CalculateHash() const {
123 const size_t size{ReadSize()};
124 const auto data{std::make_unique<char[]>(size)};
125 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
126 return Common::CityHash128(data.get(), size);
127 }
128
129 void Serialize(std::ofstream& file) const {
130 const u64 code_size{static_cast<u64>(CachedSize())};
131 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
132 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
133
134 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
135 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
136 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
137 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
138 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
139 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
140 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
141 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
142 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
143 .write(reinterpret_cast<const char*>(code.data()), code_size);
144 for (const auto [key, type] : texture_types) {
145 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
146 .write(reinterpret_cast<const char*>(&type), sizeof(type));
147 }
148 for (const auto [key, type] : cbuf_values) {
149 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
150 .write(reinterpret_cast<const char*>(&type), sizeof(type));
151 }
152 if (stage == Shader::Stage::Compute) {
153 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
154 .write(reinterpret_cast<const char*>(&shared_memory_size),
155 sizeof(shared_memory_size));
156 } else {
157 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
158 }
159 }
160
161protected:
162 static constexpr size_t INST_SIZE = sizeof(u64);
163
164 std::optional<u64> TryFindSize() {
165 constexpr size_t BLOCK_SIZE = 0x1000;
166 constexpr size_t MAXIMUM_SIZE = 0x100000;
167
168 constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
169 constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
170
171 GPUVAddr guest_addr{program_base + start_address};
172 size_t offset{0};
173 size_t size{BLOCK_SIZE};
174 while (size <= MAXIMUM_SIZE) {
175 code.resize(size / INST_SIZE);
176 u64* const data = code.data() + offset / INST_SIZE;
177 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
178 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
179 const u64 inst = data[index / INST_SIZE];
180 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
181 return offset + index;
182 }
183 }
184 guest_addr += BLOCK_SIZE;
185 size += BLOCK_SIZE;
186 offset += BLOCK_SIZE;
187 }
188 return std::nullopt;
189 }
190
191 Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
192 u32 raw) {
193 const TextureHandle handle{raw, via_header_index};
194 const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
195 Tegra::Texture::TICEntry entry;
196 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
197
198 const Shader::TextureType result{[&] {
199 switch (entry.texture_type) {
200 case Tegra::Texture::TextureType::Texture1D:
201 return Shader::TextureType::Color1D;
202 case Tegra::Texture::TextureType::Texture2D:
203 case Tegra::Texture::TextureType::Texture2DNoMipmap:
204 return Shader::TextureType::Color2D;
205 case Tegra::Texture::TextureType::Texture3D:
206 return Shader::TextureType::Color3D;
207 case Tegra::Texture::TextureType::TextureCubemap:
208 return Shader::TextureType::ColorCube;
209 case Tegra::Texture::TextureType::Texture1DArray:
210 return Shader::TextureType::ColorArray1D;
211 case Tegra::Texture::TextureType::Texture2DArray:
212 return Shader::TextureType::ColorArray2D;
213 case Tegra::Texture::TextureType::Texture1DBuffer:
214 return Shader::TextureType::Buffer;
215 case Tegra::Texture::TextureType::TextureCubeArray:
216 return Shader::TextureType::ColorArrayCube;
217 default:
218 throw Shader::NotImplementedException("Unknown texture type");
219 }
220 }()};
221 texture_types.emplace(raw, result);
222 return result;
223 }
224
225 Tegra::MemoryManager* gpu_memory{};
226 GPUVAddr program_base{};
227
228 std::vector<u64> code;
229 std::unordered_map<u32, Shader::TextureType> texture_types;
230 std::unordered_map<u64, u32> cbuf_values;
231
232 u32 local_memory_size{};
233 u32 texture_bound{};
234 u32 shared_memory_size{};
235 std::array<u32, 3> workgroup_size{};
236
237 u32 read_lowest = std::numeric_limits<u32>::max();
238 u32 read_highest = 0;
239
240 u32 cached_lowest = std::numeric_limits<u32>::max();
241 u32 cached_highest = 0;
242
243 bool has_unbound_instructions = false;
244};
245
246namespace { 48namespace {
247using Shader::Backend::SPIRV::EmitSPIRV; 49using Shader::Backend::SPIRV::EmitSPIRV;
248using Shader::Maxwell::TranslateProgram; 50using Shader::Maxwell::TranslateProgram;
51using VideoCommon::ComputeEnvironment;
52using VideoCommon::FileEnvironment;
53using VideoCommon::GenericEnvironment;
54using VideoCommon::GraphicsEnvironment;
249 55
250// TODO: Move this to a separate file 56template <typename Container>
251constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; 57auto MakeSpan(Container& container) {
252constexpr u32 CACHE_VERSION{2}; 58 return std::span(container.data(), container.size());
253
254class GraphicsEnvironment final : public GenericEnvironment {
255public:
256 explicit GraphicsEnvironment() = default;
257 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
258 Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
259 GPUVAddr program_base_, u32 start_address_)
260 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
261 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
262 switch (program) {
263 case Maxwell::ShaderProgram::VertexA:
264 stage = Shader::Stage::VertexA;
265 stage_index = 0;
266 break;
267 case Maxwell::ShaderProgram::VertexB:
268 stage = Shader::Stage::VertexB;
269 stage_index = 0;
270 break;
271 case Maxwell::ShaderProgram::TesselationControl:
272 stage = Shader::Stage::TessellationControl;
273 stage_index = 1;
274 break;
275 case Maxwell::ShaderProgram::TesselationEval:
276 stage = Shader::Stage::TessellationEval;
277 stage_index = 2;
278 break;
279 case Maxwell::ShaderProgram::Geometry:
280 stage = Shader::Stage::Geometry;
281 stage_index = 3;
282 break;
283 case Maxwell::ShaderProgram::Fragment:
284 stage = Shader::Stage::Fragment;
285 stage_index = 4;
286 break;
287 default:
288 UNREACHABLE_MSG("Invalid program={}", program);
289 break;
290 }
291 const u64 local_size{sph.LocalMemorySize()};
292 ASSERT(local_size <= std::numeric_limits<u32>::max());
293 local_memory_size = static_cast<u32>(local_size);
294 texture_bound = maxwell3d->regs.tex_cb_index;
295 }
296
297 ~GraphicsEnvironment() override = default;
298
299 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
300 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
301 ASSERT(cbuf.enabled);
302 u32 value{};
303 if (cbuf_offset < cbuf.size) {
304 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
305 }
306 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
307 return value;
308 }
309
310 Shader::TextureType ReadTextureType(u32 handle) override {
311 const auto& regs{maxwell3d->regs};
312 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
313 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
314 }
315
316private:
317 Tegra::Engines::Maxwell3D* maxwell3d{};
318 size_t stage_index{};
319};
320
321class ComputeEnvironment final : public GenericEnvironment {
322public:
323 explicit ComputeEnvironment() = default;
324 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
325 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
326 u32 start_address_)
327 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
328 &kepler_compute_} {
329 const auto& qmd{kepler_compute->launch_description};
330 stage = Shader::Stage::Compute;
331 local_memory_size = qmd.local_pos_alloc;
332 texture_bound = kepler_compute->regs.tex_cb_index;
333 shared_memory_size = qmd.shared_alloc;
334 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
335 }
336
337 ~ComputeEnvironment() override = default;
338
339 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
340 const auto& qmd{kepler_compute->launch_description};
341 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
342 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
343 u32 value{};
344 if (cbuf_offset < cbuf.size) {
345 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
346 }
347 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
348 return value;
349 }
350
351 Shader::TextureType ReadTextureType(u32 handle) override {
352 const auto& regs{kepler_compute->regs};
353 const auto& qmd{kepler_compute->launch_description};
354 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
355 }
356
357private:
358 Tegra::Engines::KeplerCompute* kepler_compute{};
359};
360
361void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
362 std::ofstream& file) {
363 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
364 return;
365 }
366 const u32 num_envs{static_cast<u32>(envs.size())};
367 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
368 for (const GenericEnvironment* const env : envs) {
369 env->Serialize(file);
370 }
371 file.write(key.data(), key.size_bytes());
372}
373
374template <typename Key, typename Envs>
375void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) {
376 try {
377 std::ofstream file;
378 file.exceptions(std::ifstream::failbit);
379 Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app);
380 if (!file.is_open()) {
381 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename);
382 return;
383 }
384 if (file.tellp() == 0) {
385 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
386 .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
387 }
388 const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key));
389 SerializePipeline(key_span, MakeSpan(envs), file);
390
391 } catch (const std::ios_base::failure& e) {
392 LOG_ERROR(Common_Filesystem, "{}", e.what());
393 if (!Common::FS::Delete(filename)) {
394 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename);
395 }
396 }
397} 59}
398 60
399class FileEnvironment final : public Shader::Environment {
400public:
401 void Deserialize(std::ifstream& file) {
402 u64 code_size{};
403 u64 num_texture_types{};
404 u64 num_cbuf_values{};
405 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
406 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
407 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
408 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
409 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
410 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
411 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
412 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
413 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
414 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
415 file.read(reinterpret_cast<char*>(code.get()), code_size);
416 for (size_t i = 0; i < num_texture_types; ++i) {
417 u32 key;
418 Shader::TextureType type;
419 file.read(reinterpret_cast<char*>(&key), sizeof(key))
420 .read(reinterpret_cast<char*>(&type), sizeof(type));
421 texture_types.emplace(key, type);
422 }
423 for (size_t i = 0; i < num_cbuf_values; ++i) {
424 u64 key;
425 u32 value;
426 file.read(reinterpret_cast<char*>(&key), sizeof(key))
427 .read(reinterpret_cast<char*>(&value), sizeof(value));
428 cbuf_values.emplace(key, value);
429 }
430 if (stage == Shader::Stage::Compute) {
431 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
432 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
433 } else {
434 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
435 }
436 }
437
438 u64 ReadInstruction(u32 address) override {
439 if (address < read_lowest || address > read_highest) {
440 throw Shader::LogicError("Out of bounds address {}", address);
441 }
442 return code[(address - read_lowest) / sizeof(u64)];
443 }
444
445 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
446 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
447 if (it == cbuf_values.end()) {
448 throw Shader::LogicError("Uncached read texture type");
449 }
450 return it->second;
451 }
452
453 Shader::TextureType ReadTextureType(u32 handle) override {
454 const auto it{texture_types.find(handle)};
455 if (it == texture_types.end()) {
456 throw Shader::LogicError("Uncached read texture type");
457 }
458 return it->second;
459 }
460
461 u32 LocalMemorySize() const override {
462 return local_memory_size;
463 }
464
465 u32 SharedMemorySize() const override {
466 return shared_memory_size;
467 }
468
469 u32 TextureBoundBuffer() const override {
470 return texture_bound;
471 }
472
473 std::array<u32, 3> WorkgroupSize() const override {
474 return workgroup_size;
475 }
476
477private:
478 std::unique_ptr<u64[]> code;
479 std::unordered_map<u32, Shader::TextureType> texture_types;
480 std::unordered_map<u64, u32> cbuf_values;
481 std::array<u32, 3> workgroup_size{};
482 u32 local_memory_size{};
483 u32 shared_memory_size{};
484 u32 texture_bound{};
485 u32 read_lowest{};
486 u32 read_highest{};
487};
488
489Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { 61Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
490 switch (comparison) { 62 switch (comparison) {
491 case Maxwell::ComparisonOp::Never: 63 case Maxwell::ComparisonOp::Never:
@@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
518} 90}
519} // Anonymous namespace 91} // Anonymous namespace
520 92
521void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
522 const VideoCore::DiskResourceLoadCallback& callback) {
523 if (title_id == 0) {
524 return;
525 }
526 std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)};
527 std::string base_dir{shader_dir + "/vulkan"};
528 std::string transferable_dir{base_dir + "/transferable"};
529 std::string precompiled_dir{base_dir + "/precompiled"};
530 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
531 !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
532 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
533 return;
534 }
535 pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id);
536
537 struct {
538 std::mutex mutex;
539 size_t total{0};
540 size_t built{0};
541 bool has_loaded{false};
542 } state;
543
544 std::ifstream file;
545 Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate);
546 if (!file.is_open()) {
547 return;
548 }
549 file.exceptions(std::ifstream::failbit);
550 const auto end{file.tellg()};
551 file.seekg(0, std::ios::beg);
552
553 std::array<char, 8> magic_number;
554 u32 cache_version;
555 file.read(magic_number.data(), magic_number.size())
556 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
557 if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
558 file.close();
559 if (Common::FS::Delete(pipeline_cache_filename)) {
560 if (magic_number != MAGIC_NUMBER) {
561 LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file");
562 }
563 if (cache_version != CACHE_VERSION) {
564 LOG_INFO(Render_Vulkan, "Deleting old pipeline cache");
565 }
566 } else {
567 LOG_ERROR(Render_Vulkan,
568 "Invalid pipeline cache file and failed to delete it in \"{}\"",
569 pipeline_cache_filename);
570 }
571 return;
572 }
573 while (file.tellg() != end) {
574 if (stop_loading) {
575 return;
576 }
577 u32 num_envs{};
578 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
579 std::vector<FileEnvironment> envs(num_envs);
580 for (FileEnvironment& env : envs) {
581 env.Deserialize(file);
582 }
583 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
584 ComputePipelineCacheKey key;
585 file.read(reinterpret_cast<char*>(&key), sizeof(key));
586
587 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
588 ShaderPools pools;
589 auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)};
590
591 std::lock_guard lock{state.mutex};
592 compute_cache.emplace(key, std::move(pipeline));
593 ++state.built;
594 if (state.has_loaded) {
595 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
596 }
597 });
598 } else {
599 GraphicsPipelineCacheKey key;
600 file.read(reinterpret_cast<char*>(&key), sizeof(key));
601
602 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
603 ShaderPools pools;
604 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
605 for (auto& env : envs) {
606 env_ptrs.push_back(&env);
607 }
608 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
609
610 std::lock_guard lock{state.mutex};
611 graphics_cache.emplace(key, std::move(pipeline));
612 ++state.built;
613 if (state.has_loaded) {
614 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
615 }
616 });
617 }
618 ++state.total;
619 }
620 {
621 std::lock_guard lock{state.mutex};
622 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
623 state.has_loaded = true;
624 }
625 workers.WaitForRequests();
626}
627
628size_t ComputePipelineCacheKey::Hash() const noexcept { 93size_t ComputePipelineCacheKey::Hash() const noexcept {
629 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); 94 const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
630 return static_cast<size_t>(hash); 95 return static_cast<size_t>(hash);
@@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
643 return std::memcmp(&rhs, this, Size()) == 0; 108 return std::memcmp(&rhs, this, Size()) == 0;
644} 109}
645 110
646PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, 111PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
647 Tegra::Engines::Maxwell3D& maxwell3d_,
648 Tegra::Engines::KeplerCompute& kepler_compute_, 112 Tegra::Engines::KeplerCompute& kepler_compute_,
649 Tegra::MemoryManager& gpu_memory_, const Device& device_, 113 Tegra::MemoryManager& gpu_memory_, const Device& device_,
650 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, 114 VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
651 VKUpdateDescriptorQueue& update_descriptor_queue_, 115 VKUpdateDescriptorQueue& update_descriptor_queue_,
652 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, 116 RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
653 TextureCache& texture_cache_) 117 TextureCache& texture_cache_)
654 : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, 118 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
655 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, 119 device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
656 scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
657 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, 120 update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
658 buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, 121 buffer_cache{buffer_cache_}, texture_cache{texture_cache_},
659 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), 122 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
@@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default;
700GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { 163GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
701 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 164 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
702 165
703 if (!RefreshStages()) { 166 if (!RefreshStages(graphics_key.unique_hashes)) {
704 current_pipeline = nullptr; 167 current_pipeline = nullptr;
705 return nullptr; 168 return nullptr;
706 } 169 }
@@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
728ComputePipeline* PipelineCache::CurrentComputePipeline() { 191ComputePipeline* PipelineCache::CurrentComputePipeline() {
729 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 192 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
730 193
731 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; 194 const ShaderInfo* const shader{ComputeShader()};
732 const auto& qmd{kepler_compute.launch_description};
733 const GPUVAddr shader_addr{program_base + qmd.program_start};
734 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
735 if (!cpu_shader_addr) {
736 return nullptr;
737 }
738 const ShaderInfo* shader{TryGet(*cpu_shader_addr)};
739 if (!shader) { 195 if (!shader) {
740 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; 196 return nullptr;
741 shader = MakeShaderInfo(env, *cpu_shader_addr);
742 } 197 }
198 const auto& qmd{kepler_compute.launch_description};
743 const ComputePipelineCacheKey key{ 199 const ComputePipelineCacheKey key{
744 .unique_hash{shader->unique_hash}, 200 .unique_hash = shader->unique_hash,
745 .shared_memory_size{qmd.shared_alloc}, 201 .shared_memory_size = qmd.shared_alloc,
746 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, 202 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
747 }; 203 };
748 const auto [pair, is_new]{compute_cache.try_emplace(key)}; 204 const auto [pair, is_new]{compute_cache.try_emplace(key)};
@@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
754 return pipeline.get(); 210 return pipeline.get();
755} 211}
756 212
757bool PipelineCache::RefreshStages() { 213void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
758 auto& dirty{maxwell3d.dirty.flags}; 214 const VideoCore::DiskResourceLoadCallback& callback) {
759 if (!dirty[VideoCommon::Dirty::Shaders]) { 215 if (title_id == 0) {
760 return last_valid_shaders; 216 return;
761 } 217 }
762 dirty[VideoCommon::Dirty::Shaders] = false; 218 auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
763 219 auto base_dir{shader_dir / "vulkan"};
764 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 220 auto transferable_dir{base_dir / "transferable"};
765 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 221 auto precompiled_dir{base_dir / "precompiled"};
766 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { 222 if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
767 graphics_key.unique_hashes[index] = u128{}; 223 !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
768 continue; 224 LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
769 } 225 return;
770 const auto& shader_config{maxwell3d.regs.shader_config[index]};
771 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
772 const GPUVAddr shader_addr{base_addr + shader_config.offset};
773 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
774 if (!cpu_shader_addr) {
775 LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
776 last_valid_shaders = false;
777 return false;
778 }
779 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
780 if (!shader_info) {
781 const u32 start_address{shader_config.offset};
782 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
783 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
784 }
785 shader_infos[index] = shader_info;
786 graphics_key.unique_hashes[index] = shader_info->unique_hash;
787 } 226 }
788 last_valid_shaders = true; 227 pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id);
789 return true;
790}
791 228
792const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { 229 struct {
793 auto info = std::make_unique<ShaderInfo>(); 230 std::mutex mutex;
794 if (const std::optional<u128> cached_hash{env.Analyze()}) { 231 size_t total{0};
795 info->unique_hash = *cached_hash; 232 size_t built{0};
796 info->size_bytes = env.CachedSize(); 233 bool has_loaded{false};
797 } else { 234 } state;
798 // Slow path, not really hit on commercial games 235
799 // Build a control flow graph to get the real shader size 236 const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
800 main_pools.flow_block.ReleaseContents(); 237 ComputePipelineCacheKey key;
801 Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; 238 file.read(reinterpret_cast<char*>(&key), sizeof(key));
802 info->unique_hash = env.CalculateHash(); 239
803 info->size_bytes = env.ReadSize(); 240 workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
804 } 241 ShaderPools pools;
805 const size_t size_bytes{info->size_bytes}; 242 auto pipeline{CreateComputePipeline(pools, key, env, false)};
806 const ShaderInfo* const result{info.get()}; 243
807 Register(std::move(info), cpu_addr, size_bytes); 244 std::lock_guard lock{state.mutex};
808 return result; 245 compute_cache.emplace(key, std::move(pipeline));
246 ++state.built;
247 if (state.has_loaded) {
248 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
249 }
250 });
251 ++state.total;
252 }};
253 const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
254 GraphicsPipelineCacheKey key;
255 file.read(reinterpret_cast<char*>(&key), sizeof(key));
256
257 workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
258 ShaderPools pools;
259 boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
260 for (auto& env : envs) {
261 env_ptrs.push_back(&env);
262 }
263 auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
264
265 std::lock_guard lock{state.mutex};
266 graphics_cache.emplace(key, std::move(pipeline));
267 ++state.built;
268 if (state.has_loaded) {
269 callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
270 }
271 });
272 ++state.total;
273 }};
274 VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics);
275
276 std::unique_lock lock{state.mutex};
277 callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
278 state.has_loaded = true;
279 lock.unlock();
280
281 workers.WaitForRequests();
809} 282}
810 283
811std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( 284std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
@@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
815 size_t env_index{0}; 288 size_t env_index{0};
816 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; 289 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
817 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 290 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
818 if (key.unique_hashes[index] == u128{}) { 291 if (key.unique_hashes[index] == 0) {
819 continue; 292 continue;
820 } 293 }
821 Shader::Environment& env{*envs[env_index]}; 294 Shader::Environment& env{*envs[env_index]};
@@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
830 303
831 u32 binding{0}; 304 u32 binding{0};
832 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 305 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
833 if (key.unique_hashes[index] == u128{}) { 306 if (key.unique_hashes[index] == 0) {
834 continue; 307 continue;
835 } 308 }
836 UNIMPLEMENTED_IF(index == 0); 309 UNIMPLEMENTED_IF(index == 0);
@@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
844 device.SaveShader(code); 317 device.SaveShader(code);
845 modules[stage_index] = BuildShader(device, code); 318 modules[stage_index] = BuildShader(device, code);
846 if (device.HasDebuggingToolAttached()) { 319 if (device.HasDebuggingToolAttached()) {
847 const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], 320 const std::string name{fmt::format("{:016x}", key.unique_hashes[index])};
848 key.unique_hashes[index][1])};
849 modules[stage_index].SetObjectNameEXT(name.c_str()); 321 modules[stage_index].SetObjectNameEXT(name.c_str());
850 } 322 }
851 } 323 }
@@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
863 335
864 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 336 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
865 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 337 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
866 if (graphics_key.unique_hashes[index] == u128{}) { 338 if (graphics_key.unique_hashes[index] == 0) {
867 continue; 339 continue;
868 } 340 }
869 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 341 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
871 const u32 start_address{maxwell3d.regs.shader_config[index].offset}; 343 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
872 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; 344 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
873 env.SetCachedSize(shader_infos[index]->size_bytes); 345 env.SetCachedSize(shader_infos[index]->size_bytes);
874
875 envs.push_back(&env); 346 envs.push_back(&env);
876 } 347 }
877 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; 348 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
@@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
882 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> 353 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
883 env_ptrs; 354 env_ptrs;
884 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 355 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
885 if (key.unique_hashes[index] != u128{}) { 356 if (key.unique_hashes[index] != 0) {
886 env_ptrs.push_back(&envs[index]); 357 env_ptrs.push_back(&envs[index]);
887 } 358 }
888 } 359 }
889 SerializePipeline(key, env_ptrs, pipeline_cache_filename); 360 VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename);
890 }); 361 });
891 return pipeline; 362 return pipeline;
892} 363}
@@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
902 auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; 373 auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
903 if (!pipeline_cache_filename.empty()) { 374 if (!pipeline_cache_filename.empty()) {
904 serialization_thread.QueueWork([this, key, env = std::move(env)] { 375 serialization_thread.QueueWork([this, key, env = std::move(env)] {
905 SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, 376 VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
906 pipeline_cache_filename); 377 pipeline_cache_filename);
907 }); 378 });
908 } 379 }
909 return pipeline; 380 return pipeline;
@@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
921 device.SaveShader(code); 392 device.SaveShader(code);
922 vk::ShaderModule spv_module{BuildShader(device, code)}; 393 vk::ShaderModule spv_module{BuildShader(device, code)};
923 if (device.HasDebuggingToolAttached()) { 394 if (device.HasDebuggingToolAttached()) {
924 const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; 395 const auto name{fmt::format("{:016x}", key.unique_hash)};
925 spv_module.SetObjectNameEXT(name.c_str()); 396 spv_module.SetObjectNameEXT(name.c_str());
926 } 397 }
927 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 398 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
@@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
1035 Shader::Profile profile{base_profile}; 506 Shader::Profile profile{base_profile};
1036 507
1037 const Shader::Stage stage{program.stage}; 508 const Shader::Stage stage{program.stage};
1038 const bool has_geometry{key.unique_hashes[4] != u128{}}; 509 const bool has_geometry{key.unique_hashes[4] != 0};
1039 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; 510 const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
1040 const float point_size{Common::BitCast<float>(key.state.point_size)}; 511 const float point_size{Common::BitCast<float>(key.state.point_size)};
1041 switch (stage) { 512 switch (stage) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index eec17d3fd..4e48b4956 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <cstddef> 8#include <cstddef>
9#include <filesystem>
9#include <iosfwd> 10#include <iosfwd>
10#include <memory> 11#include <memory>
11#include <type_traits> 12#include <type_traits>
@@ -42,7 +43,7 @@ namespace Vulkan {
42using Maxwell = Tegra::Engines::Maxwell3D::Regs; 43using Maxwell = Tegra::Engines::Maxwell3D::Regs;
43 44
44struct ComputePipelineCacheKey { 45struct ComputePipelineCacheKey {
45 u128 unique_hash; 46 u64 unique_hash;
46 u32 shared_memory_size; 47 u32 shared_memory_size;
47 std::array<u32, 3> workgroup_size; 48 std::array<u32, 3> workgroup_size;
48 49
@@ -76,16 +77,12 @@ namespace Vulkan {
76class ComputePipeline; 77class ComputePipeline;
77class Device; 78class Device;
78class DescriptorPool; 79class DescriptorPool;
79class GenericEnvironment;
80class RasterizerVulkan; 80class RasterizerVulkan;
81class RenderPassCache; 81class RenderPassCache;
82class VKScheduler; 82class VKScheduler;
83class VKUpdateDescriptorQueue; 83class VKUpdateDescriptorQueue;
84 84
85struct ShaderInfo { 85using VideoCommon::ShaderInfo;
86 u128 unique_hash{};
87 size_t size_bytes{};
88};
89 86
90struct ShaderPools { 87struct ShaderPools {
91 void ReleaseContents() { 88 void ReleaseContents() {
@@ -99,17 +96,16 @@ struct ShaderPools {
99 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; 96 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
100}; 97};
101 98
102class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { 99class PipelineCache : public VideoCommon::ShaderCache {
103public: 100public:
104 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, 101 explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
105 Tegra::Engines::Maxwell3D& maxwell3d,
106 Tegra::Engines::KeplerCompute& kepler_compute, 102 Tegra::Engines::KeplerCompute& kepler_compute,
107 Tegra::MemoryManager& gpu_memory, const Device& device, 103 Tegra::MemoryManager& gpu_memory, const Device& device,
108 VKScheduler& scheduler, DescriptorPool& descriptor_pool, 104 VKScheduler& scheduler, DescriptorPool& descriptor_pool,
109 VKUpdateDescriptorQueue& update_descriptor_queue, 105 VKUpdateDescriptorQueue& update_descriptor_queue,
110 RenderPassCache& render_pass_cache, BufferCache& buffer_cache, 106 RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
111 TextureCache& texture_cache); 107 TextureCache& texture_cache);
112 ~PipelineCache() override; 108 ~PipelineCache();
113 109
114 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); 110 [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
115 111
@@ -119,10 +115,6 @@ public:
119 const VideoCore::DiskResourceLoadCallback& callback); 115 const VideoCore::DiskResourceLoadCallback& callback);
120 116
121private: 117private:
122 bool RefreshStages();
123
124 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
125
126 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); 118 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
127 119
128 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( 120 std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
@@ -140,11 +132,6 @@ private:
140 Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, 132 Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key,
141 const Shader::IR::Program& program); 133 const Shader::IR::Program& program);
142 134
143 Tegra::GPU& gpu;
144 Tegra::Engines::Maxwell3D& maxwell3d;
145 Tegra::Engines::KeplerCompute& kepler_compute;
146 Tegra::MemoryManager& gpu_memory;
147
148 const Device& device; 135 const Device& device;
149 VKScheduler& scheduler; 136 VKScheduler& scheduler;
150 DescriptorPool& descriptor_pool; 137 DescriptorPool& descriptor_pool;
@@ -156,16 +143,13 @@ private:
156 GraphicsPipelineCacheKey graphics_key{}; 143 GraphicsPipelineCacheKey graphics_key{};
157 GraphicsPipeline* current_pipeline{}; 144 GraphicsPipeline* current_pipeline{};
158 145
159 std::array<const ShaderInfo*, 6> shader_infos{};
160 bool last_valid_shaders{};
161
162 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; 146 std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
163 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; 147 std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
164 148
165 ShaderPools main_pools; 149 ShaderPools main_pools;
166 150
167 Shader::Profile base_profile; 151 Shader::Profile base_profile;
168 std::string pipeline_cache_filename; 152 std::filesystem::path pipeline_cache_filename;
169 153
170 Common::ThreadWorker workers; 154 Common::ThreadWorker workers;
171 Common::ThreadWorker serialization_thread; 155 Common::ThreadWorker serialization_thread;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7df169c85..fa6daeb3a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
149 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, 149 buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
150 update_descriptor_queue, descriptor_pool), 150 update_descriptor_queue, descriptor_pool),
151 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 151 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
152 pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, 152 pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
153 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, 153 descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
154 texture_cache), 154 texture_cache),
155 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, 155 query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
new file mode 100644
index 000000000..b8b8eace5
--- /dev/null
+++ b/src/video_core/shader_cache.cpp
@@ -0,0 +1,233 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <vector>
8
9#include "common/assert.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/object_pool.h"
12#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
16#include "video_core/shader_cache.h"
17#include "video_core/shader_environment.h"
18
19namespace VideoCommon {
20
21void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
22 std::scoped_lock lock{invalidation_mutex};
23 InvalidatePagesInRegion(addr, size);
24 RemovePendingShaders();
25}
26
27void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
28 std::lock_guard lock{invalidation_mutex};
29 InvalidatePagesInRegion(addr, size);
30}
31
32void ShaderCache::SyncGuestHost() {
33 std::scoped_lock lock{invalidation_mutex};
34 RemovePendingShaders();
35}
36
37ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
38 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
39 Tegra::Engines::KeplerCompute& kepler_compute_)
40 : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
41 rasterizer{rasterizer_} {}
42
43bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
44 auto& dirty{maxwell3d.dirty.flags};
45 if (!dirty[VideoCommon::Dirty::Shaders]) {
46 return last_shaders_valid;
47 }
48 dirty[VideoCommon::Dirty::Shaders] = false;
49
50 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
51 for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
52 if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
53 unique_hashes[index] = 0;
54 continue;
55 }
56 const auto& shader_config{maxwell3d.regs.shader_config[index]};
57 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
58 const GPUVAddr shader_addr{base_addr + shader_config.offset};
59 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
60 if (!cpu_shader_addr) {
61 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
62 last_shaders_valid = false;
63 return false;
64 }
65 const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
66 if (!shader_info) {
67 const u32 start_address{shader_config.offset};
68 GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
69 shader_info = MakeShaderInfo(env, *cpu_shader_addr);
70 }
71 shader_infos[index] = shader_info;
72 unique_hashes[index] = shader_info->unique_hash;
73 }
74 last_shaders_valid = true;
75 return true;
76}
77
78const ShaderInfo* ShaderCache::ComputeShader() {
79 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
80 const auto& qmd{kepler_compute.launch_description};
81 const GPUVAddr shader_addr{program_base + qmd.program_start};
82 const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
83 if (!cpu_shader_addr) {
84 LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
85 return nullptr;
86 }
87 if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
88 return shader;
89 }
90 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
91 return MakeShaderInfo(env, *cpu_shader_addr);
92}
93
94ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
95 std::scoped_lock lock{lookup_mutex};
96
97 const auto it = lookup_cache.find(addr);
98 if (it == lookup_cache.end()) {
99 return nullptr;
100 }
101 return it->second->data;
102}
103
104void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
105 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
106
107 const VAddr addr_end = addr + size;
108 Entry* const entry = NewEntry(addr, addr_end, data.get());
109
110 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
111 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
112 invalidation_cache[page].push_back(entry);
113 }
114
115 storage.push_back(std::move(data));
116
117 rasterizer.UpdatePagesCachedCount(addr, size, 1);
118}
119
120void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
121 const VAddr addr_end = addr + size;
122 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
123 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
124 auto it = invalidation_cache.find(page);
125 if (it == invalidation_cache.end()) {
126 continue;
127 }
128 InvalidatePageEntries(it->second, addr, addr_end);
129 }
130}
131
132void ShaderCache::RemovePendingShaders() {
133 if (marked_for_removal.empty()) {
134 return;
135 }
136 // Remove duplicates
137 std::ranges::sort(marked_for_removal);
138 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
139 marked_for_removal.end());
140
141 std::vector<ShaderInfo*> removed_shaders;
142 removed_shaders.reserve(marked_for_removal.size());
143
144 std::scoped_lock lock{lookup_mutex};
145
146 for (Entry* const entry : marked_for_removal) {
147 removed_shaders.push_back(entry->data);
148
149 const auto it = lookup_cache.find(entry->addr_start);
150 ASSERT(it != lookup_cache.end());
151 lookup_cache.erase(it);
152 }
153 marked_for_removal.clear();
154
155 if (!removed_shaders.empty()) {
156 RemoveShadersFromStorage(std::move(removed_shaders));
157 }
158}
159
160void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
161 size_t index = 0;
162 while (index < entries.size()) {
163 Entry* const entry = entries[index];
164 if (!entry->Overlaps(addr, addr_end)) {
165 ++index;
166 continue;
167 }
168
169 UnmarkMemory(entry);
170 RemoveEntryFromInvalidationCache(entry);
171 marked_for_removal.push_back(entry);
172 }
173}
174
175void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
176 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
177 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
178 const auto entries_it = invalidation_cache.find(page);
179 ASSERT(entries_it != invalidation_cache.end());
180 std::vector<Entry*>& entries = entries_it->second;
181
182 const auto entry_it = std::ranges::find(entries, entry);
183 ASSERT(entry_it != entries.end());
184 entries.erase(entry_it);
185 }
186}
187
188void ShaderCache::UnmarkMemory(Entry* entry) {
189 if (!entry->is_memory_marked) {
190 return;
191 }
192 entry->is_memory_marked = false;
193
194 const VAddr addr = entry->addr_start;
195 const size_t size = entry->addr_end - addr;
196 rasterizer.UpdatePagesCachedCount(addr, size, -1);
197}
198
199void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
200 // Remove them from the cache
201 std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
202 return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
203 });
204}
205
206ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
207 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
208 Entry* const entry_pointer = entry.get();
209
210 lookup_cache.emplace(addr, std::move(entry));
211 return entry_pointer;
212}
213
214const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
215 auto info = std::make_unique<ShaderInfo>();
216 if (const std::optional<u64> cached_hash{env.Analyze()}) {
217 info->unique_hash = *cached_hash;
218 info->size_bytes = env.CachedSize();
219 } else {
220 // Slow path, not really hit on commercial games
221 // Build a control flow graph to get the real shader size
222 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
223 Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
224 info->unique_hash = env.CalculateHash();
225 info->size_bytes = env.ReadSize();
226 }
227 const size_t size_bytes{info->size_bytes};
228 const ShaderInfo* const result{info.get()};
229 Register(std::move(info), cpu_addr, size_bytes);
230 return result;
231}
232
233} // namespace VideoCommon
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 015a789d6..89a4bcc84 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -4,20 +4,28 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
8#include <memory> 7#include <memory>
9#include <mutex> 8#include <mutex>
10#include <unordered_map> 9#include <unordered_map>
11#include <utility> 10#include <utility>
12#include <vector> 11#include <vector>
13 12
14#include "common/assert.h"
15#include "common/common_types.h" 13#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
17 15
16namespace Tegra {
17class MemoryManager;
18}
19
18namespace VideoCommon { 20namespace VideoCommon {
19 21
20template <class T> 22class GenericEnvironment;
23
24struct ShaderInfo {
25 u64 unique_hash{};
26 size_t size_bytes{};
27};
28
21class ShaderCache { 29class ShaderCache {
22 static constexpr u64 PAGE_BITS = 14; 30 static constexpr u64 PAGE_BITS = 14;
23 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; 31 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
@@ -25,206 +33,100 @@ class ShaderCache {
25 struct Entry { 33 struct Entry {
26 VAddr addr_start; 34 VAddr addr_start;
27 VAddr addr_end; 35 VAddr addr_end;
28 T* data; 36 ShaderInfo* data;
29 37
30 bool is_memory_marked = true; 38 bool is_memory_marked = true;
31 39
32 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { 40 bool Overlaps(VAddr start, VAddr end) const noexcept {
33 return start < addr_end && addr_start < end; 41 return start < addr_end && addr_start < end;
34 } 42 }
35 }; 43 };
36 44
37public: 45public:
38 virtual ~ShaderCache() = default;
39
40 /// @brief Removes shaders inside a given region 46 /// @brief Removes shaders inside a given region
41 /// @note Checks for ranges 47 /// @note Checks for ranges
42 /// @param addr Start address of the invalidation 48 /// @param addr Start address of the invalidation
43 /// @param size Number of bytes of the invalidation 49 /// @param size Number of bytes of the invalidation
44 void InvalidateRegion(VAddr addr, std::size_t size) { 50 void InvalidateRegion(VAddr addr, size_t size);
45 std::scoped_lock lock{invalidation_mutex};
46 InvalidatePagesInRegion(addr, size);
47 RemovePendingShaders();
48 }
49 51
50 /// @brief Unmarks a memory region as cached and marks it for removal 52 /// @brief Unmarks a memory region as cached and marks it for removal
51 /// @param addr Start address of the CPU write operation 53 /// @param addr Start address of the CPU write operation
52 /// @param size Number of bytes of the CPU write operation 54 /// @param size Number of bytes of the CPU write operation
53 void OnCPUWrite(VAddr addr, std::size_t size) { 55 void OnCPUWrite(VAddr addr, size_t size);
54 std::lock_guard lock{invalidation_mutex};
55 InvalidatePagesInRegion(addr, size);
56 }
57 56
58 /// @brief Flushes delayed removal operations 57 /// @brief Flushes delayed removal operations
59 void SyncGuestHost() { 58 void SyncGuestHost();
60 std::scoped_lock lock{invalidation_mutex}; 59
61 RemovePendingShaders(); 60protected:
62 } 61 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
62 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
63 Tegra::Engines::KeplerCompute& kepler_compute_);
64
65 /// @brief Update the hashes and information of shader stages
66 /// @param unique_hashes Shader hashes to store into when a stage is enabled
67 /// @return True no success, false on error
68 bool RefreshStages(std::array<u64, 6>& unique_hashes);
69
70 /// @brief Returns information about the current compute shader
71 /// @return Pointer to a valid shader, nullptr on error
72 const ShaderInfo* ComputeShader();
73
74 Tegra::MemoryManager& gpu_memory;
75 Tegra::Engines::Maxwell3D& maxwell3d;
76 Tegra::Engines::KeplerCompute& kepler_compute;
63 77
78 std::array<const ShaderInfo*, 6> shader_infos{};
79 bool last_shaders_valid = false;
80
81private:
64 /// @brief Tries to obtain a cached shader starting in a given address 82 /// @brief Tries to obtain a cached shader starting in a given address
65 /// @note Doesn't check for ranges, the given address has to be the start of the shader 83 /// @note Doesn't check for ranges, the given address has to be the start of the shader
66 /// @param addr Start address of the shader, this doesn't cache for region 84 /// @param addr Start address of the shader, this doesn't cache for region
67 /// @return Pointer to a valid shader, nullptr when nothing is found 85 /// @return Pointer to a valid shader, nullptr when nothing is found
68 T* TryGet(VAddr addr) const { 86 ShaderInfo* TryGet(VAddr addr) const;
69 std::scoped_lock lock{lookup_mutex};
70
71 const auto it = lookup_cache.find(addr);
72 if (it == lookup_cache.end()) {
73 return nullptr;
74 }
75 return it->second->data;
76 }
77
78protected:
79 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
80 87
81 /// @brief Register in the cache a given entry 88 /// @brief Register in the cache a given entry
82 /// @param data Shader to store in the cache 89 /// @param data Shader to store in the cache
83 /// @param addr Start address of the shader that will be registered 90 /// @param addr Start address of the shader that will be registered
84 /// @param size Size in bytes of the shader 91 /// @param size Size in bytes of the shader
85 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { 92 void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
86 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
87
88 const VAddr addr_end = addr + size;
89 Entry* const entry = NewEntry(addr, addr_end, data.get());
90
91 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
92 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
93 invalidation_cache[page].push_back(entry);
94 }
95
96 storage.push_back(std::move(data));
97 93
98 rasterizer.UpdatePagesCachedCount(addr, size, 1);
99 }
100
101 /// @brief Called when a shader is going to be removed
102 /// @param shader Shader that will be removed
103 /// @pre invalidation_cache is locked
104 /// @pre lookup_mutex is locked
105 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
106
107private:
108 /// @brief Invalidate pages in a given region 94 /// @brief Invalidate pages in a given region
109 /// @pre invalidation_mutex is locked 95 /// @pre invalidation_mutex is locked
110 void InvalidatePagesInRegion(VAddr addr, std::size_t size) { 96 void InvalidatePagesInRegion(VAddr addr, size_t size);
111 const VAddr addr_end = addr + size;
112 const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
113 for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
114 auto it = invalidation_cache.find(page);
115 if (it == invalidation_cache.end()) {
116 continue;
117 }
118 InvalidatePageEntries(it->second, addr, addr_end);
119 }
120 }
121 97
122 /// @brief Remove shaders marked for deletion 98 /// @brief Remove shaders marked for deletion
123 /// @pre invalidation_mutex is locked 99 /// @pre invalidation_mutex is locked
124 void RemovePendingShaders() { 100 void RemovePendingShaders();
125 if (marked_for_removal.empty()) {
126 return;
127 }
128 // Remove duplicates
129 std::sort(marked_for_removal.begin(), marked_for_removal.end());
130 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
131 marked_for_removal.end());
132
133 std::vector<T*> removed_shaders;
134 removed_shaders.reserve(marked_for_removal.size());
135
136 std::scoped_lock lock{lookup_mutex};
137
138 for (Entry* const entry : marked_for_removal) {
139 removed_shaders.push_back(entry->data);
140
141 const auto it = lookup_cache.find(entry->addr_start);
142 ASSERT(it != lookup_cache.end());
143 lookup_cache.erase(it);
144 }
145 marked_for_removal.clear();
146
147 if (!removed_shaders.empty()) {
148 RemoveShadersFromStorage(std::move(removed_shaders));
149 }
150 }
151 101
152 /// @brief Invalidates entries in a given range for the passed page 102 /// @brief Invalidates entries in a given range for the passed page
153 /// @param entries Vector of entries in the page, it will be modified on overlaps 103 /// @param entries Vector of entries in the page, it will be modified on overlaps
154 /// @param addr Start address of the invalidation 104 /// @param addr Start address of the invalidation
155 /// @param addr_end Non-inclusive end address of the invalidation 105 /// @param addr_end Non-inclusive end address of the invalidation
156 /// @pre invalidation_mutex is locked 106 /// @pre invalidation_mutex is locked
157 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { 107 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
158 std::size_t index = 0;
159 while (index < entries.size()) {
160 Entry* const entry = entries[index];
161 if (!entry->Overlaps(addr, addr_end)) {
162 ++index;
163 continue;
164 }
165
166 UnmarkMemory(entry);
167 RemoveEntryFromInvalidationCache(entry);
168 marked_for_removal.push_back(entry);
169 }
170 }
171 108
172 /// @brief Removes all references to an entry in the invalidation cache 109 /// @brief Removes all references to an entry in the invalidation cache
173 /// @param entry Entry to remove from the invalidation cache 110 /// @param entry Entry to remove from the invalidation cache
174 /// @pre invalidation_mutex is locked 111 /// @pre invalidation_mutex is locked
175 void RemoveEntryFromInvalidationCache(const Entry* entry) { 112 void RemoveEntryFromInvalidationCache(const Entry* entry);
176 const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
177 for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
178 const auto entries_it = invalidation_cache.find(page);
179 ASSERT(entries_it != invalidation_cache.end());
180 std::vector<Entry*>& entries = entries_it->second;
181
182 const auto entry_it = std::find(entries.begin(), entries.end(), entry);
183 ASSERT(entry_it != entries.end());
184 entries.erase(entry_it);
185 }
186 }
187 113
188 /// @brief Unmarks an entry from the rasterizer cache 114 /// @brief Unmarks an entry from the rasterizer cache
189 /// @param entry Entry to unmark from memory 115 /// @param entry Entry to unmark from memory
190 void UnmarkMemory(Entry* entry) { 116 void UnmarkMemory(Entry* entry);
191 if (!entry->is_memory_marked) {
192 return;
193 }
194 entry->is_memory_marked = false;
195
196 const VAddr addr = entry->addr_start;
197 const std::size_t size = entry->addr_end - addr;
198 rasterizer.UpdatePagesCachedCount(addr, size, -1);
199 }
200 117
201 /// @brief Removes a vector of shaders from a list 118 /// @brief Removes a vector of shaders from a list
202 /// @param removed_shaders Shaders to be removed from the storage 119 /// @param removed_shaders Shaders to be removed from the storage
203 /// @pre invalidation_mutex is locked 120 /// @pre invalidation_mutex is locked
204 /// @pre lookup_mutex is locked 121 /// @pre lookup_mutex is locked
205 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { 122 void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
206 // Notify removals
207 for (T* const shader : removed_shaders) {
208 OnShaderRemoval(shader);
209 }
210
211 // Remove them from the cache
212 const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
213 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
214 removed_shaders.end();
215 };
216 std::erase_if(storage, is_removed);
217 }
218 123
219 /// @brief Creates a new entry in the lookup cache and returns its pointer 124 /// @brief Creates a new entry in the lookup cache and returns its pointer
220 /// @pre lookup_mutex is locked 125 /// @pre lookup_mutex is locked
221 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { 126 Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
222 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
223 Entry* const entry_pointer = entry.get();
224 127
225 lookup_cache.emplace(addr, std::move(entry)); 128 /// @brief Create a new shader entry and register it
226 return entry_pointer; 129 const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
227 }
228 130
229 VideoCore::RasterizerInterface& rasterizer; 131 VideoCore::RasterizerInterface& rasterizer;
230 132
@@ -233,7 +135,7 @@ private:
233 135
234 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; 136 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
235 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; 137 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
236 std::vector<std::unique_ptr<T>> storage; 138 std::vector<std::unique_ptr<ShaderInfo>> storage;
237 std::vector<Entry*> marked_for_removal; 139 std::vector<Entry*> marked_for_removal;
238}; 140};
239 141
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..5dccc0097
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,453 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <filesystem>
6#include <fstream>
7#include <memory>
8#include <optional>
9#include <utility>
10
11#include "common/assert.h"
12#include "common/cityhash.h"
13#include "common/common_types.h"
14#include "common/div_ceil.h"
15#include "common/fs/fs.h"
16#include "common/logging/log.h"
17#include "shader_recompiler/environment.h"
18#include "video_core/memory_manager.h"
19#include "video_core/shader_environment.h"
20#include "video_core/textures/texture.h"
21
22namespace VideoCommon {
23
24constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
25constexpr u32 CACHE_VERSION = 3;
26
27constexpr size_t INST_SIZE = sizeof(u64);
28
29using Maxwell = Tegra::Engines::Maxwell3D::Regs;
30
31static u64 MakeCbufKey(u32 index, u32 offset) {
32 return (static_cast<u64>(index) << 32) | offset;
33}
34
35static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
36 switch (entry.texture_type) {
37 case Tegra::Texture::TextureType::Texture1D:
38 return Shader::TextureType::Color1D;
39 case Tegra::Texture::TextureType::Texture2D:
40 case Tegra::Texture::TextureType::Texture2DNoMipmap:
41 return Shader::TextureType::Color2D;
42 case Tegra::Texture::TextureType::Texture3D:
43 return Shader::TextureType::Color3D;
44 case Tegra::Texture::TextureType::TextureCubemap:
45 return Shader::TextureType::ColorCube;
46 case Tegra::Texture::TextureType::Texture1DArray:
47 return Shader::TextureType::ColorArray1D;
48 case Tegra::Texture::TextureType::Texture2DArray:
49 return Shader::TextureType::ColorArray2D;
50 case Tegra::Texture::TextureType::Texture1DBuffer:
51 return Shader::TextureType::Buffer;
52 case Tegra::Texture::TextureType::TextureCubeArray:
53 return Shader::TextureType::ColorArrayCube;
54 default:
55 throw Shader::NotImplementedException("Unknown texture type");
56 }
57}
58
59GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
60 u32 start_address_)
61 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
62 start_address = start_address_;
63}
64
65GenericEnvironment::~GenericEnvironment() = default;
66
67u32 GenericEnvironment::TextureBoundBuffer() const {
68 return texture_bound;
69}
70
71u32 GenericEnvironment::LocalMemorySize() const {
72 return local_memory_size;
73}
74
75u32 GenericEnvironment::SharedMemorySize() const {
76 return shared_memory_size;
77}
78
79std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
80 return workgroup_size;
81}
82
83u64 GenericEnvironment::ReadInstruction(u32 address) {
84 read_lowest = std::min(read_lowest, address);
85 read_highest = std::max(read_highest, address);
86
87 if (address >= cached_lowest && address < cached_highest) {
88 return code[(address - cached_lowest) / INST_SIZE];
89 }
90 has_unbound_instructions = true;
91 return gpu_memory->Read<u64>(program_base + address);
92}
93
94std::optional<u64> GenericEnvironment::Analyze() {
95 const std::optional<u64> size{TryFindSize()};
96 if (!size) {
97 return std::nullopt;
98 }
99 cached_lowest = start_address;
100 cached_highest = start_address + static_cast<u32>(*size);
101 return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
102}
103
104void GenericEnvironment::SetCachedSize(size_t size_bytes) {
105 cached_lowest = start_address;
106 cached_highest = start_address + static_cast<u32>(size_bytes);
107 code.resize(CachedSize());
108 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
109}
110
111size_t GenericEnvironment::CachedSize() const noexcept {
112 return cached_highest - cached_lowest + INST_SIZE;
113}
114
115size_t GenericEnvironment::ReadSize() const noexcept {
116 return read_highest - read_lowest + INST_SIZE;
117}
118
119bool GenericEnvironment::CanBeSerialized() const noexcept {
120 return !has_unbound_instructions;
121}
122
123u64 GenericEnvironment::CalculateHash() const {
124 const size_t size{ReadSize()};
125 const auto data{std::make_unique<char[]>(size)};
126 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
127 return Common::CityHash64(data.get(), size);
128}
129
130void GenericEnvironment::Serialize(std::ofstream& file) const {
131 const u64 code_size{static_cast<u64>(CachedSize())};
132 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
133 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
134
135 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
136 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
137 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
138 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
139 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
140 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
141 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
142 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
143 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
144 .write(reinterpret_cast<const char*>(code.data()), code_size);
145 for (const auto [key, type] : texture_types) {
146 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
147 .write(reinterpret_cast<const char*>(&type), sizeof(type));
148 }
149 for (const auto [key, type] : cbuf_values) {
150 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
151 .write(reinterpret_cast<const char*>(&type), sizeof(type));
152 }
153 if (stage == Shader::Stage::Compute) {
154 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
155 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
156 } else {
157 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
158 }
159}
160
161std::optional<u64> GenericEnvironment::TryFindSize() {
162 static constexpr size_t BLOCK_SIZE = 0x1000;
163 static constexpr size_t MAXIMUM_SIZE = 0x100000;
164
165 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
166 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
167
168 GPUVAddr guest_addr{program_base + start_address};
169 size_t offset{0};
170 size_t size{BLOCK_SIZE};
171 while (size <= MAXIMUM_SIZE) {
172 code.resize(size / INST_SIZE);
173 u64* const data = code.data() + offset / INST_SIZE;
174 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
175 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
176 const u64 inst = data[index / INST_SIZE];
177 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
178 return offset + index;
179 }
180 }
181 guest_addr += BLOCK_SIZE;
182 size += BLOCK_SIZE;
183 offset += BLOCK_SIZE;
184 }
185 return std::nullopt;
186}
187
188Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
189 bool via_header_index, u32 raw) {
190 const TextureHandle handle{raw, via_header_index};
191 const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
192 Tegra::Texture::TICEntry entry;
193 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
194 const Shader::TextureType result{ConvertType(entry)};
195 texture_types.emplace(raw, result);
196 return result;
197}
198
199GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
200 Tegra::MemoryManager& gpu_memory_,
201 Maxwell::ShaderProgram program, GPUVAddr program_base_,
202 u32 start_address_)
203 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
204 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
205 switch (program) {
206 case Maxwell::ShaderProgram::VertexA:
207 stage = Shader::Stage::VertexA;
208 stage_index = 0;
209 break;
210 case Maxwell::ShaderProgram::VertexB:
211 stage = Shader::Stage::VertexB;
212 stage_index = 0;
213 break;
214 case Maxwell::ShaderProgram::TesselationControl:
215 stage = Shader::Stage::TessellationControl;
216 stage_index = 1;
217 break;
218 case Maxwell::ShaderProgram::TesselationEval:
219 stage = Shader::Stage::TessellationEval;
220 stage_index = 2;
221 break;
222 case Maxwell::ShaderProgram::Geometry:
223 stage = Shader::Stage::Geometry;
224 stage_index = 3;
225 break;
226 case Maxwell::ShaderProgram::Fragment:
227 stage = Shader::Stage::Fragment;
228 stage_index = 4;
229 break;
230 default:
231 UNREACHABLE_MSG("Invalid program={}", program);
232 break;
233 }
234 const u64 local_size{sph.LocalMemorySize()};
235 ASSERT(local_size <= std::numeric_limits<u32>::max());
236 local_memory_size = static_cast<u32>(local_size);
237 texture_bound = maxwell3d->regs.tex_cb_index;
238}
239
240u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
241 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
242 ASSERT(cbuf.enabled);
243 u32 value{};
244 if (cbuf_offset < cbuf.size) {
245 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
246 }
247 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
248 return value;
249}
250
251Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
252 const auto& regs{maxwell3d->regs};
253 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
254 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
255}
256
257ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
258 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
259 u32 start_address_)
260 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
261 &kepler_compute_} {
262 const auto& qmd{kepler_compute->launch_description};
263 stage = Shader::Stage::Compute;
264 local_memory_size = qmd.local_pos_alloc;
265 texture_bound = kepler_compute->regs.tex_cb_index;
266 shared_memory_size = qmd.shared_alloc;
267 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
268}
269
270u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
271 const auto& qmd{kepler_compute->launch_description};
272 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
273 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
274 u32 value{};
275 if (cbuf_offset < cbuf.size) {
276 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
277 }
278 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
279 return value;
280}
281
282Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
283 const auto& regs{kepler_compute->regs};
284 const auto& qmd{kepler_compute->launch_description};
285 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
286}
287
288void FileEnvironment::Deserialize(std::ifstream& file) {
289 u64 code_size{};
290 u64 num_texture_types{};
291 u64 num_cbuf_values{};
292 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
293 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
294 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
295 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
296 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
297 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
298 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
299 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
300 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
301 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
302 file.read(reinterpret_cast<char*>(code.get()), code_size);
303 for (size_t i = 0; i < num_texture_types; ++i) {
304 u32 key;
305 Shader::TextureType type;
306 file.read(reinterpret_cast<char*>(&key), sizeof(key))
307 .read(reinterpret_cast<char*>(&type), sizeof(type));
308 texture_types.emplace(key, type);
309 }
310 for (size_t i = 0; i < num_cbuf_values; ++i) {
311 u64 key;
312 u32 value;
313 file.read(reinterpret_cast<char*>(&key), sizeof(key))
314 .read(reinterpret_cast<char*>(&value), sizeof(value));
315 cbuf_values.emplace(key, value);
316 }
317 if (stage == Shader::Stage::Compute) {
318 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
319 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
320 } else {
321 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
322 }
323}
324
325u64 FileEnvironment::ReadInstruction(u32 address) {
326 if (address < read_lowest || address > read_highest) {
327 throw Shader::LogicError("Out of bounds address {}", address);
328 }
329 return code[(address - read_lowest) / sizeof(u64)];
330}
331
332u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
333 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
334 if (it == cbuf_values.end()) {
335 throw Shader::LogicError("Uncached read texture type");
336 }
337 return it->second;
338}
339
340Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
341 const auto it{texture_types.find(handle)};
342 if (it == texture_types.end()) {
343 throw Shader::LogicError("Uncached read texture type");
344 }
345 return it->second;
346}
347
348u32 FileEnvironment::LocalMemorySize() const {
349 return local_memory_size;
350}
351
352u32 FileEnvironment::SharedMemorySize() const {
353 return shared_memory_size;
354}
355
356u32 FileEnvironment::TextureBoundBuffer() const {
357 return texture_bound;
358}
359
360std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
361 return workgroup_size;
362}
363
364void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
365 const std::filesystem::path& filename) try {
366 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
367 file.exceptions(std::ifstream::failbit);
368 if (!file.is_open()) {
369 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
370 Common::FS::PathToUTF8String(filename));
371 return;
372 }
373 if (file.tellp() == 0) {
374 // Write header
375 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
376 .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
377 }
378 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
379 return;
380 }
381 const u32 num_envs{static_cast<u32>(envs.size())};
382 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
383 for (const GenericEnvironment* const env : envs) {
384 env->Serialize(file);
385 }
386 file.write(key.data(), key.size_bytes());
387
388} catch (const std::ios_base::failure& e) {
389 LOG_ERROR(Common_Filesystem, "{}", e.what());
390 if (!Common::FS::RemoveFile(filename)) {
391 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
392 Common::FS::PathToUTF8String(filename));
393 }
394}
395
396void LoadPipelines(
397 std::stop_token stop_loading, const std::filesystem::path& filename,
398 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
399 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
400 std::ifstream file(filename, std::ios::binary | std::ios::ate);
401 if (!file.is_open()) {
402 return;
403 }
404 file.exceptions(std::ifstream::failbit);
405 const auto end{file.tellg()};
406 file.seekg(0, std::ios::beg);
407
408 std::array<char, 8> magic_number;
409 u32 cache_version;
410 file.read(magic_number.data(), magic_number.size())
411 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
412 if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
413 file.close();
414 if (Common::FS::RemoveFile(filename)) {
415 if (magic_number != MAGIC_NUMBER) {
416 LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
417 }
418 if (cache_version != CACHE_VERSION) {
419 LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
420 }
421 } else {
422 LOG_ERROR(Common_Filesystem,
423 "Invalid pipeline cache file and failed to delete it in \"{}\"",
424 Common::FS::PathToUTF8String(filename));
425 }
426 return;
427 }
428 while (file.tellg() != end) {
429 if (stop_loading.stop_requested()) {
430 return;
431 }
432 u32 num_envs{};
433 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
434 std::vector<FileEnvironment> envs(num_envs);
435 for (FileEnvironment& env : envs) {
436 env.Deserialize(file);
437 }
438 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
439 load_compute(file, std::move(envs.front()));
440 } else {
441 load_graphics(file, std::move(envs));
442 }
443 }
444
445} catch (const std::ios_base::failure& e) {
446 LOG_ERROR(Common_Filesystem, "{}", e.what());
447 if (!Common::FS::RemoveFile(filename)) {
448 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
449 Common::FS::PathToUTF8String(filename));
450 }
451}
452
453} // namespace VideoCommon
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
new file mode 100644
index 000000000..37d712045
--- /dev/null
+++ b/src/video_core/shader_environment.h
@@ -0,0 +1,198 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <atomic>
9#include <filesystem>
10#include <iosfwd>
11#include <limits>
12#include <memory>
13#include <optional>
14#include <span>
15#include <type_traits>
16#include <unordered_map>
17#include <vector>
18
19#include "common/common_types.h"
20#include "common/unique_function.h"
21#include "shader_recompiler/environment.h"
22#include "video_core/engines/kepler_compute.h"
23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/textures/texture.h"
25
26namespace Tegra {
27class Memorymanager;
28}
29
30namespace VideoCommon {
31
32struct TextureHandle {
33 explicit TextureHandle(u32 data, bool via_header_index) {
34 if (via_header_index) {
35 image = data;
36 sampler = data;
37 } else {
38 const Tegra::Texture::TextureHandle handle{data};
39 image = handle.tic_id;
40 sampler = via_header_index ? image : handle.tsc_id.Value();
41 }
42 }
43
44 u32 image;
45 u32 sampler;
46};
47
48class GenericEnvironment : public Shader::Environment {
49public:
50 explicit GenericEnvironment() = default;
51 explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
52 u32 start_address_);
53
54 ~GenericEnvironment() override;
55
56 [[nodiscard]] u32 TextureBoundBuffer() const final;
57
58 [[nodiscard]] u32 LocalMemorySize() const final;
59
60 [[nodiscard]] u32 SharedMemorySize() const final;
61
62 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
63
64 [[nodiscard]] u64 ReadInstruction(u32 address) final;
65
66 [[nodiscard]] std::optional<u64> Analyze();
67
68 void SetCachedSize(size_t size_bytes);
69
70 [[nodiscard]] size_t CachedSize() const noexcept;
71
72 [[nodiscard]] size_t ReadSize() const noexcept;
73
74 [[nodiscard]] bool CanBeSerialized() const noexcept;
75
76 [[nodiscard]] u64 CalculateHash() const;
77
78 void Serialize(std::ofstream& file) const;
79
80protected:
81 std::optional<u64> TryFindSize();
82
83 Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
84 u32 raw);
85
86 Tegra::MemoryManager* gpu_memory{};
87 GPUVAddr program_base{};
88
89 std::vector<u64> code;
90 std::unordered_map<u32, Shader::TextureType> texture_types;
91 std::unordered_map<u64, u32> cbuf_values;
92
93 u32 local_memory_size{};
94 u32 texture_bound{};
95 u32 shared_memory_size{};
96 std::array<u32, 3> workgroup_size{};
97
98 u32 read_lowest = std::numeric_limits<u32>::max();
99 u32 read_highest = 0;
100
101 u32 cached_lowest = std::numeric_limits<u32>::max();
102 u32 cached_highest = 0;
103
104 bool has_unbound_instructions = false;
105};
106
107class GraphicsEnvironment final : public GenericEnvironment {
108public:
109 explicit GraphicsEnvironment() = default;
110 explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
111 Tegra::MemoryManager& gpu_memory_,
112 Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
113 GPUVAddr program_base_, u32 start_address_);
114
115 ~GraphicsEnvironment() override = default;
116
117 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
118
119 Shader::TextureType ReadTextureType(u32 handle) override;
120
121private:
122 Tegra::Engines::Maxwell3D* maxwell3d{};
123 size_t stage_index{};
124};
125
126class ComputeEnvironment final : public GenericEnvironment {
127public:
128 explicit ComputeEnvironment() = default;
129 explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
130 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
131 u32 start_address_);
132
133 ~ComputeEnvironment() override = default;
134
135 u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
136
137 Shader::TextureType ReadTextureType(u32 handle) override;
138
139private:
140 Tegra::Engines::KeplerCompute* kepler_compute{};
141};
142
143class FileEnvironment final : public Shader::Environment {
144public:
145 FileEnvironment() = default;
146 ~FileEnvironment() override = default;
147
148 FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
149 FileEnvironment(FileEnvironment&&) noexcept = default;
150
151 FileEnvironment& operator=(const FileEnvironment&) = delete;
152 FileEnvironment(const FileEnvironment&) = delete;
153
154 void Deserialize(std::ifstream& file);
155
156 [[nodiscard]] u64 ReadInstruction(u32 address) override;
157
158 [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
159
160 [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
161
162 [[nodiscard]] u32 LocalMemorySize() const override;
163
164 [[nodiscard]] u32 SharedMemorySize() const override;
165
166 [[nodiscard]] u32 TextureBoundBuffer() const override;
167
168 [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
169
170private:
171 std::unique_ptr<u64[]> code;
172 std::unordered_map<u32, Shader::TextureType> texture_types;
173 std::unordered_map<u64, u32> cbuf_values;
174 std::array<u32, 3> workgroup_size{};
175 u32 local_memory_size{};
176 u32 shared_memory_size{};
177 u32 texture_bound{};
178 u32 read_lowest{};
179 u32 read_highest{};
180};
181
182void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
183 const std::filesystem::path& filename);
184
185template <typename Key, typename Envs>
186void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) {
187 static_assert(std::is_trivially_copyable_v<Key>);
188 static_assert(std::has_unique_object_representations_v<Key>);
189 SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
190 std::span(envs.data(), envs.size()), filename);
191}
192
193void LoadPipelines(
194 std::stop_token stop_loading, const std::filesystem::path& filename,
195 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
196 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
197
198} // namespace VideoCommon