summaryrefslogtreecommitdiff
path: root/src/video_core/shader_environment.cpp
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-04-26 03:53:26 -0300
committerGravatar ameerj2021-07-22 21:51:29 -0400
commit025b20f96ae588777e3ff11083cc4184bf418af6 (patch)
tree7cda9932a219409196adfc8a8d7d5793840657c1 /src/video_core/shader_environment.cpp
parentvulkan: Defer descriptor set work to the Vulkan thread (diff)
downloadyuzu-025b20f96ae588777e3ff11083cc4184bf418af6.tar.gz
yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.tar.xz
yuzu-025b20f96ae588777e3ff11083cc4184bf418af6.zip
shader: Move pipeline cache logic to separate files
Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL.
Diffstat (limited to 'src/video_core/shader_environment.cpp')
-rw-r--r--src/video_core/shader_environment.cpp453
1 files changed, 453 insertions, 0 deletions
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
new file mode 100644
index 000000000..5dccc0097
--- /dev/null
+++ b/src/video_core/shader_environment.cpp
@@ -0,0 +1,453 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <filesystem>
6#include <fstream>
7#include <memory>
8#include <optional>
9#include <utility>
10
11#include "common/assert.h"
12#include "common/cityhash.h"
13#include "common/common_types.h"
14#include "common/div_ceil.h"
15#include "common/fs/fs.h"
16#include "common/logging/log.h"
17#include "shader_recompiler/environment.h"
18#include "video_core/memory_manager.h"
19#include "video_core/shader_environment.h"
20#include "video_core/textures/texture.h"
21
22namespace VideoCommon {
23
24constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
25constexpr u32 CACHE_VERSION = 3;
26
27constexpr size_t INST_SIZE = sizeof(u64);
28
29using Maxwell = Tegra::Engines::Maxwell3D::Regs;
30
31static u64 MakeCbufKey(u32 index, u32 offset) {
32 return (static_cast<u64>(index) << 32) | offset;
33}
34
35static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
36 switch (entry.texture_type) {
37 case Tegra::Texture::TextureType::Texture1D:
38 return Shader::TextureType::Color1D;
39 case Tegra::Texture::TextureType::Texture2D:
40 case Tegra::Texture::TextureType::Texture2DNoMipmap:
41 return Shader::TextureType::Color2D;
42 case Tegra::Texture::TextureType::Texture3D:
43 return Shader::TextureType::Color3D;
44 case Tegra::Texture::TextureType::TextureCubemap:
45 return Shader::TextureType::ColorCube;
46 case Tegra::Texture::TextureType::Texture1DArray:
47 return Shader::TextureType::ColorArray1D;
48 case Tegra::Texture::TextureType::Texture2DArray:
49 return Shader::TextureType::ColorArray2D;
50 case Tegra::Texture::TextureType::Texture1DBuffer:
51 return Shader::TextureType::Buffer;
52 case Tegra::Texture::TextureType::TextureCubeArray:
53 return Shader::TextureType::ColorArrayCube;
54 default:
55 throw Shader::NotImplementedException("Unknown texture type");
56 }
57}
58
59GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
60 u32 start_address_)
61 : gpu_memory{&gpu_memory_}, program_base{program_base_} {
62 start_address = start_address_;
63}
64
65GenericEnvironment::~GenericEnvironment() = default;
66
67u32 GenericEnvironment::TextureBoundBuffer() const {
68 return texture_bound;
69}
70
71u32 GenericEnvironment::LocalMemorySize() const {
72 return local_memory_size;
73}
74
75u32 GenericEnvironment::SharedMemorySize() const {
76 return shared_memory_size;
77}
78
79std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
80 return workgroup_size;
81}
82
83u64 GenericEnvironment::ReadInstruction(u32 address) {
84 read_lowest = std::min(read_lowest, address);
85 read_highest = std::max(read_highest, address);
86
87 if (address >= cached_lowest && address < cached_highest) {
88 return code[(address - cached_lowest) / INST_SIZE];
89 }
90 has_unbound_instructions = true;
91 return gpu_memory->Read<u64>(program_base + address);
92}
93
94std::optional<u64> GenericEnvironment::Analyze() {
95 const std::optional<u64> size{TryFindSize()};
96 if (!size) {
97 return std::nullopt;
98 }
99 cached_lowest = start_address;
100 cached_highest = start_address + static_cast<u32>(*size);
101 return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
102}
103
104void GenericEnvironment::SetCachedSize(size_t size_bytes) {
105 cached_lowest = start_address;
106 cached_highest = start_address + static_cast<u32>(size_bytes);
107 code.resize(CachedSize());
108 gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
109}
110
111size_t GenericEnvironment::CachedSize() const noexcept {
112 return cached_highest - cached_lowest + INST_SIZE;
113}
114
115size_t GenericEnvironment::ReadSize() const noexcept {
116 return read_highest - read_lowest + INST_SIZE;
117}
118
119bool GenericEnvironment::CanBeSerialized() const noexcept {
120 return !has_unbound_instructions;
121}
122
123u64 GenericEnvironment::CalculateHash() const {
124 const size_t size{ReadSize()};
125 const auto data{std::make_unique<char[]>(size)};
126 gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
127 return Common::CityHash64(data.get(), size);
128}
129
130void GenericEnvironment::Serialize(std::ofstream& file) const {
131 const u64 code_size{static_cast<u64>(CachedSize())};
132 const u64 num_texture_types{static_cast<u64>(texture_types.size())};
133 const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
134
135 file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
136 .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
137 .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
138 .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
139 .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
140 .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
141 .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
142 .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
143 .write(reinterpret_cast<const char*>(&stage), sizeof(stage))
144 .write(reinterpret_cast<const char*>(code.data()), code_size);
145 for (const auto [key, type] : texture_types) {
146 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
147 .write(reinterpret_cast<const char*>(&type), sizeof(type));
148 }
149 for (const auto [key, type] : cbuf_values) {
150 file.write(reinterpret_cast<const char*>(&key), sizeof(key))
151 .write(reinterpret_cast<const char*>(&type), sizeof(type));
152 }
153 if (stage == Shader::Stage::Compute) {
154 file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
155 .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
156 } else {
157 file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
158 }
159}
160
161std::optional<u64> GenericEnvironment::TryFindSize() {
162 static constexpr size_t BLOCK_SIZE = 0x1000;
163 static constexpr size_t MAXIMUM_SIZE = 0x100000;
164
165 static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
166 static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
167
168 GPUVAddr guest_addr{program_base + start_address};
169 size_t offset{0};
170 size_t size{BLOCK_SIZE};
171 while (size <= MAXIMUM_SIZE) {
172 code.resize(size / INST_SIZE);
173 u64* const data = code.data() + offset / INST_SIZE;
174 gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
175 for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
176 const u64 inst = data[index / INST_SIZE];
177 if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
178 return offset + index;
179 }
180 }
181 guest_addr += BLOCK_SIZE;
182 size += BLOCK_SIZE;
183 offset += BLOCK_SIZE;
184 }
185 return std::nullopt;
186}
187
188Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
189 bool via_header_index, u32 raw) {
190 const TextureHandle handle{raw, via_header_index};
191 const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
192 Tegra::Texture::TICEntry entry;
193 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
194 const Shader::TextureType result{ConvertType(entry)};
195 texture_types.emplace(raw, result);
196 return result;
197}
198
199GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
200 Tegra::MemoryManager& gpu_memory_,
201 Maxwell::ShaderProgram program, GPUVAddr program_base_,
202 u32 start_address_)
203 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
204 gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
205 switch (program) {
206 case Maxwell::ShaderProgram::VertexA:
207 stage = Shader::Stage::VertexA;
208 stage_index = 0;
209 break;
210 case Maxwell::ShaderProgram::VertexB:
211 stage = Shader::Stage::VertexB;
212 stage_index = 0;
213 break;
214 case Maxwell::ShaderProgram::TesselationControl:
215 stage = Shader::Stage::TessellationControl;
216 stage_index = 1;
217 break;
218 case Maxwell::ShaderProgram::TesselationEval:
219 stage = Shader::Stage::TessellationEval;
220 stage_index = 2;
221 break;
222 case Maxwell::ShaderProgram::Geometry:
223 stage = Shader::Stage::Geometry;
224 stage_index = 3;
225 break;
226 case Maxwell::ShaderProgram::Fragment:
227 stage = Shader::Stage::Fragment;
228 stage_index = 4;
229 break;
230 default:
231 UNREACHABLE_MSG("Invalid program={}", program);
232 break;
233 }
234 const u64 local_size{sph.LocalMemorySize()};
235 ASSERT(local_size <= std::numeric_limits<u32>::max());
236 local_memory_size = static_cast<u32>(local_size);
237 texture_bound = maxwell3d->regs.tex_cb_index;
238}
239
240u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
241 const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
242 ASSERT(cbuf.enabled);
243 u32 value{};
244 if (cbuf_offset < cbuf.size) {
245 value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
246 }
247 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
248 return value;
249}
250
251Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
252 const auto& regs{maxwell3d->regs};
253 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
254 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
255}
256
257ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
258 Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
259 u32 start_address_)
260 : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
261 &kepler_compute_} {
262 const auto& qmd{kepler_compute->launch_description};
263 stage = Shader::Stage::Compute;
264 local_memory_size = qmd.local_pos_alloc;
265 texture_bound = kepler_compute->regs.tex_cb_index;
266 shared_memory_size = qmd.shared_alloc;
267 workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
268}
269
270u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
271 const auto& qmd{kepler_compute->launch_description};
272 ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
273 const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
274 u32 value{};
275 if (cbuf_offset < cbuf.size) {
276 value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
277 }
278 cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
279 return value;
280}
281
282Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
283 const auto& regs{kepler_compute->regs};
284 const auto& qmd{kepler_compute->launch_description};
285 return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
286}
287
288void FileEnvironment::Deserialize(std::ifstream& file) {
289 u64 code_size{};
290 u64 num_texture_types{};
291 u64 num_cbuf_values{};
292 file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
293 .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
294 .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
295 .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
296 .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
297 .read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
298 .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
299 .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
300 .read(reinterpret_cast<char*>(&stage), sizeof(stage));
301 code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
302 file.read(reinterpret_cast<char*>(code.get()), code_size);
303 for (size_t i = 0; i < num_texture_types; ++i) {
304 u32 key;
305 Shader::TextureType type;
306 file.read(reinterpret_cast<char*>(&key), sizeof(key))
307 .read(reinterpret_cast<char*>(&type), sizeof(type));
308 texture_types.emplace(key, type);
309 }
310 for (size_t i = 0; i < num_cbuf_values; ++i) {
311 u64 key;
312 u32 value;
313 file.read(reinterpret_cast<char*>(&key), sizeof(key))
314 .read(reinterpret_cast<char*>(&value), sizeof(value));
315 cbuf_values.emplace(key, value);
316 }
317 if (stage == Shader::Stage::Compute) {
318 file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
319 .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
320 } else {
321 file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
322 }
323}
324
325u64 FileEnvironment::ReadInstruction(u32 address) {
326 if (address < read_lowest || address > read_highest) {
327 throw Shader::LogicError("Out of bounds address {}", address);
328 }
329 return code[(address - read_lowest) / sizeof(u64)];
330}
331
332u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
333 const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
334 if (it == cbuf_values.end()) {
335 throw Shader::LogicError("Uncached read texture type");
336 }
337 return it->second;
338}
339
340Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
341 const auto it{texture_types.find(handle)};
342 if (it == texture_types.end()) {
343 throw Shader::LogicError("Uncached read texture type");
344 }
345 return it->second;
346}
347
348u32 FileEnvironment::LocalMemorySize() const {
349 return local_memory_size;
350}
351
352u32 FileEnvironment::SharedMemorySize() const {
353 return shared_memory_size;
354}
355
356u32 FileEnvironment::TextureBoundBuffer() const {
357 return texture_bound;
358}
359
360std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
361 return workgroup_size;
362}
363
364void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
365 const std::filesystem::path& filename) try {
366 std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
367 file.exceptions(std::ifstream::failbit);
368 if (!file.is_open()) {
369 LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
370 Common::FS::PathToUTF8String(filename));
371 return;
372 }
373 if (file.tellp() == 0) {
374 // Write header
375 file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
376 .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
377 }
378 if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
379 return;
380 }
381 const u32 num_envs{static_cast<u32>(envs.size())};
382 file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
383 for (const GenericEnvironment* const env : envs) {
384 env->Serialize(file);
385 }
386 file.write(key.data(), key.size_bytes());
387
388} catch (const std::ios_base::failure& e) {
389 LOG_ERROR(Common_Filesystem, "{}", e.what());
390 if (!Common::FS::RemoveFile(filename)) {
391 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
392 Common::FS::PathToUTF8String(filename));
393 }
394}
395
396void LoadPipelines(
397 std::stop_token stop_loading, const std::filesystem::path& filename,
398 Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
399 Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
400 std::ifstream file(filename, std::ios::binary | std::ios::ate);
401 if (!file.is_open()) {
402 return;
403 }
404 file.exceptions(std::ifstream::failbit);
405 const auto end{file.tellg()};
406 file.seekg(0, std::ios::beg);
407
408 std::array<char, 8> magic_number;
409 u32 cache_version;
410 file.read(magic_number.data(), magic_number.size())
411 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
412 if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
413 file.close();
414 if (Common::FS::RemoveFile(filename)) {
415 if (magic_number != MAGIC_NUMBER) {
416 LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
417 }
418 if (cache_version != CACHE_VERSION) {
419 LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
420 }
421 } else {
422 LOG_ERROR(Common_Filesystem,
423 "Invalid pipeline cache file and failed to delete it in \"{}\"",
424 Common::FS::PathToUTF8String(filename));
425 }
426 return;
427 }
428 while (file.tellg() != end) {
429 if (stop_loading.stop_requested()) {
430 return;
431 }
432 u32 num_envs{};
433 file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
434 std::vector<FileEnvironment> envs(num_envs);
435 for (FileEnvironment& env : envs) {
436 env.Deserialize(file);
437 }
438 if (envs.front().ShaderStage() == Shader::Stage::Compute) {
439 load_compute(file, std::move(envs.front()));
440 } else {
441 load_graphics(file, std::move(envs));
442 }
443 }
444
445} catch (const std::ios_base::failure& e) {
446 LOG_ERROR(Common_Filesystem, "{}", e.what());
447 if (!Common::FS::RemoveFile(filename)) {
448 LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
449 Common::FS::PathToUTF8String(filename));
450 }
451}
452
453} // namespace VideoCommon