summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp489
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h66
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp280
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h32
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp94
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h41
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp8
-rw-r--r--src/video_core/shader/const_buffer_locker.h4
-rw-r--r--src/video_core/shader/control_flow.cpp18
-rw-r--r--src/video_core/shader/control_flow.h3
-rw-r--r--src/video_core/shader/decode.cpp9
-rw-r--r--src/video_core/shader/shader_ir.cpp7
-rw-r--r--src/video_core/shader/shader_ir.h12
15 files changed, 420 insertions, 722 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 9e2799876..6402d6763 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,10 +3,12 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <mutex>
6#include <optional>
7#include <string>
6#include <thread> 8#include <thread>
9#include <unordered_set>
7#include <boost/functional/hash.hpp> 10#include <boost/functional/hash.hpp>
8#include "common/assert.h" 11#include "common/assert.h"
9#include "common/hash.h"
10#include "common/scope_exit.h" 12#include "common/scope_exit.h"
11#include "core/core.h" 13#include "core/core.h"
12#include "core/frontend/emu_window.h" 14#include "core/frontend/emu_window.h"
@@ -22,18 +24,20 @@
22 24
23namespace OpenGL { 25namespace OpenGL {
24 26
27using Tegra::Engines::ShaderType;
28using VideoCommon::Shader::ConstBufferLocker;
25using VideoCommon::Shader::ProgramCode; 29using VideoCommon::Shader::ProgramCode;
30using VideoCommon::Shader::ShaderIR;
31
32namespace {
26 33
27// One UBO is always reserved for emulation values on staged shaders 34// One UBO is always reserved for emulation values on staged shaders
28constexpr u32 STAGE_RESERVED_UBOS = 1; 35constexpr u32 STAGE_RESERVED_UBOS = 1;
29 36
30struct UnspecializedShader { 37constexpr u32 STAGE_MAIN_OFFSET = 10;
31 std::string code; 38constexpr u32 KERNEL_MAIN_OFFSET = 0;
32 GLShader::ShaderEntries entries;
33 ProgramType program_type;
34};
35 39
36namespace { 40constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
37 41
38/// Gets the address for the specified shader stage program 42/// Gets the address for the specified shader stage program
39GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { 43GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -42,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program)
42 return gpu.regs.code_address.CodeAddress() + shader_config.offset; 46 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
43} 47}
44 48
49/// Gets if the current instruction offset is a scheduler instruction
50constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
51 // Sched instructions appear once every 4 instructions.
52 constexpr std::size_t SchedPeriod = 4;
53 const std::size_t absolute_offset = offset - main_offset;
54 return (absolute_offset % SchedPeriod) == 0;
55}
56
57/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
59 constexpr std::size_t start_offset = 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one.
62 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
63 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
64 std::size_t offset = start_offset;
65 while (offset < program.size()) {
66 const u64 instruction = program[offset];
67 if (!IsSchedInstruction(offset, start_offset)) {
68 if ((instruction & mask) == self_jumping_branch) {
69 // End on Maxwell's "nop" instruction
70 break;
71 }
72 if (instruction == 0) {
73 break;
74 }
75 }
76 offset++;
77 }
78 // The last instruction is included in the program size
79 return std::min(offset + 1, program.size());
80}
81
45/// Gets the shader program code from memory for the specified address 82/// Gets the shader program code from memory for the specified address
46ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, 83ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
47 const u8* host_ptr) { 84 const u8* host_ptr) {
@@ -52,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
52 }); 89 });
53 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), 90 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
54 program_code.size() * sizeof(u64)); 91 program_code.size() * sizeof(u64));
92 program_code.resize(CalculateProgramSize(program_code));
55 return program_code; 93 return program_code;
56} 94}
57 95
@@ -72,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
72 } 110 }
73} 111}
74 112
75/// Gets if the current instruction offset is a scheduler instruction
76constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
77 // Sched instructions appear once every 4 instructions.
78 constexpr std::size_t SchedPeriod = 4;
79 const std::size_t absolute_offset = offset - main_offset;
80 return (absolute_offset % SchedPeriod) == 0;
81}
82
83/// Describes primitive behavior on geometry shaders 113/// Describes primitive behavior on geometry shaders
84constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { 114constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
85 switch (primitive_mode) { 115 switch (primitive_mode) {
@@ -122,122 +152,114 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) {
122 return {}; 152 return {};
123} 153}
124 154
125/// Calculates the size of a program stream
126std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
127 constexpr std::size_t start_offset = 10;
128 // This is the encoded version of BRA that jumps to itself. All Nvidia
129 // shaders end with one.
130 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
131 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
132 std::size_t offset = start_offset;
133 std::size_t size = start_offset * sizeof(u64);
134 while (offset < program.size()) {
135 const u64 instruction = program[offset];
136 if (!IsSchedInstruction(offset, start_offset)) {
137 if ((instruction & mask) == self_jumping_branch) {
138 // End on Maxwell's "nop" instruction
139 break;
140 }
141 if (instruction == 0) {
142 break;
143 }
144 }
145 size += sizeof(u64);
146 offset++;
147 }
148 // The last instruction is included in the program size
149 return std::min(size + sizeof(u64), program.size() * sizeof(u64));
150}
151
152/// Hashes one (or two) program streams 155/// Hashes one (or two) program streams
153u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, 156u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
154 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { 157 const ProgramCode& code_b) {
155 if (size_a == 0) { 158 u64 unique_identifier = boost::hash_value(code);
156 size_a = CalculateProgramSize(code); 159 if (program_type == ProgramType::VertexA) {
157 } 160 // VertexA programs include two programs
158 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); 161 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
159 if (program_type != ProgramType::VertexA) {
160 return unique_identifier;
161 }
162 // VertexA programs include two programs
163
164 std::size_t seed = 0;
165 boost::hash_combine(seed, unique_identifier);
166
167 if (size_b == 0) {
168 size_b = CalculateProgramSize(code_b);
169 } 162 }
170 const u64 identifier_b = 163 return unique_identifier;
171 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
172 boost::hash_combine(seed, identifier_b);
173 return static_cast<u64>(seed);
174} 164}
175 165
176/// Creates an unspecialized program from code streams 166/// Creates an unspecialized program from code streams
177GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device, 167std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
178 ProgramType program_type, ProgramCode program_code, 168 const std::optional<ShaderIR>& ir_b) {
179 ProgramCode program_code_b) {
180 GLShader::ShaderSetup setup(program_code);
181 setup.program.size_a = CalculateProgramSize(program_code);
182 setup.program.size_b = 0;
183 if (program_type == ProgramType::VertexA) {
184 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
185 // Conventional HW does not support this, so we combine VertexA and VertexB into one
186 // stage here.
187 setup.SetProgramB(program_code_b);
188 setup.program.size_b = CalculateProgramSize(program_code_b);
189 }
190 setup.program.unique_identifier = GetUniqueIdentifier(
191 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
192
193 switch (program_type) { 169 switch (program_type) {
194 case ProgramType::VertexA: 170 case ProgramType::VertexA:
195 case ProgramType::VertexB: { 171 case ProgramType::VertexB:
196 VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex, 172 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
197 &(system.GPU().Maxwell3D())}; 173 case ProgramType::Geometry:
198 return GLShader::GenerateVertexShader(locker, device, setup); 174 return GLShader::GenerateGeometryShader(device, ir);
199 } 175 case ProgramType::Fragment:
200 case ProgramType::Geometry: { 176 return GLShader::GenerateFragmentShader(device, ir);
201 VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry, 177 case ProgramType::Compute:
202 &(system.GPU().Maxwell3D())}; 178 return GLShader::GenerateComputeShader(device, ir);
203 return GLShader::GenerateGeometryShader(locker, device, setup);
204 }
205 case ProgramType::Fragment: {
206 VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment,
207 &(system.GPU().Maxwell3D())};
208 return GLShader::GenerateFragmentShader(locker, device, setup);
209 }
210 case ProgramType::Compute: {
211 VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())};
212 return GLShader::GenerateComputeShader(locker, device, setup);
213 }
214 default: 179 default:
215 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); 180 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
216 return {}; 181 return {};
217 } 182 }
218} 183}
219 184
220CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 185constexpr const char* GetProgramTypeName(ProgramType program_type) {
221 ProgramType program_type, const ProgramVariant& variant, 186 switch (program_type) {
222 bool hint_retrievable = false) { 187 case ProgramType::VertexA:
188 case ProgramType::VertexB:
189 return "VS";
190 case ProgramType::TessellationControl:
191 return "TCS";
192 case ProgramType::TessellationEval:
193 return "TES";
194 case ProgramType::Geometry:
195 return "GS";
196 case ProgramType::Fragment:
197 return "FS";
198 case ProgramType::Compute:
199 return "CS";
200 }
201 return "UNK";
202}
203
204Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
205 switch (program_type) {
206 case ProgramType::VertexA:
207 case ProgramType::VertexB:
208 return Tegra::Engines::ShaderType::Vertex;
209 case ProgramType::TessellationControl:
210 return Tegra::Engines::ShaderType::TesselationControl;
211 case ProgramType::TessellationEval:
212 return Tegra::Engines::ShaderType::TesselationEval;
213 case ProgramType::Geometry:
214 return Tegra::Engines::ShaderType::Geometry;
215 case ProgramType::Fragment:
216 return Tegra::Engines::ShaderType::Fragment;
217 case ProgramType::Compute:
218 return Tegra::Engines::ShaderType::Compute;
219 }
220 UNREACHABLE();
221 return {};
222}
223
224std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
225 return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
226}
227
228CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
229 const ProgramCode& program_code, const ProgramCode& program_code_b,
230 const ProgramVariant& variant, ConstBufferLocker& locker,
231 bool hint_retrievable = false) {
232 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
233
234 const bool is_compute = program_type == ProgramType::Compute;
235 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
236 const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
237 std::optional<ShaderIR> ir_b;
238 if (!program_code_b.empty()) {
239 ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
240 }
241 const auto entries = GLShader::GetEntries(ir);
242
223 auto base_bindings{variant.base_bindings}; 243 auto base_bindings{variant.base_bindings};
224 const auto primitive_mode{variant.primitive_mode}; 244 const auto primitive_mode{variant.primitive_mode};
225 const auto texture_buffer_usage{variant.texture_buffer_usage}; 245 const auto texture_buffer_usage{variant.texture_buffer_usage};
226 246
227 std::string source = R"(#version 430 core 247 std::string source = fmt::format(R"(// {}
248#version 430 core
228#extension GL_ARB_separate_shader_objects : enable 249#extension GL_ARB_separate_shader_objects : enable
229#extension GL_ARB_shader_viewport_layer_array : enable 250#extension GL_ARB_shader_viewport_layer_array : enable
230#extension GL_EXT_shader_image_load_formatted : enable 251#extension GL_EXT_shader_image_load_formatted : enable
231#extension GL_NV_gpu_shader5 : enable 252#extension GL_NV_gpu_shader5 : enable
232#extension GL_NV_shader_thread_group : enable 253#extension GL_NV_shader_thread_group : enable
233#extension GL_NV_shader_thread_shuffle : enable 254#extension GL_NV_shader_thread_shuffle : enable
234)"; 255)",
235 if (program_type == ProgramType::Compute) { 256 GetShaderId(unique_identifier, program_type));
257 if (is_compute) {
236 source += "#extension GL_ARB_compute_variable_group_size : require\n"; 258 source += "#extension GL_ARB_compute_variable_group_size : require\n";
237 } 259 }
238 source += '\n'; 260 source += '\n';
239 261
240 if (program_type != ProgramType::Compute) { 262 if (!is_compute) {
241 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 263 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
242 } 264 }
243 265
@@ -281,7 +303,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
281 } 303 }
282 304
283 source += '\n'; 305 source += '\n';
284 source += code; 306 source += GenerateGLSL(device, program_type, ir, ir_b);
285 307
286 OGLShader shader; 308 OGLShader shader;
287 shader.Create(source.c_str(), GetShaderType(program_type)); 309 shader.Create(source.c_str(), GetShaderType(program_type));
@@ -291,85 +313,86 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
291 return program; 313 return program;
292} 314}
293 315
294std::set<GLenum> GetSupportedFormats() { 316std::unordered_set<GLenum> GetSupportedFormats() {
295 std::set<GLenum> supported_formats;
296
297 GLint num_formats{}; 317 GLint num_formats{};
298 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 318 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
299 319
300 std::vector<GLint> formats(num_formats); 320 std::vector<GLint> formats(num_formats);
301 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); 321 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
302 322
303 for (const GLint format : formats) 323 std::unordered_set<GLenum> supported_formats;
324 for (const GLint format : formats) {
304 supported_formats.insert(static_cast<GLenum>(format)); 325 supported_formats.insert(static_cast<GLenum>(format));
326 }
305 return supported_formats; 327 return supported_formats;
306} 328}
307 329
308} // Anonymous namespace 330} // Anonymous namespace
309 331
310CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, 332CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
311 GLShader::ProgramResult result) 333 GLShader::ShaderEntries entries, ProgramCode program_code,
312 : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, 334 ProgramCode program_code_b)
313 unique_identifier{params.unique_identifier}, program_type{program_type}, 335 : RasterizerCacheObject{params.host_ptr}, system{params.system},
314 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, 336 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
315 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} 337 unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
338 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
339 if (params.precompiled_variants) {
340 for (const auto& pair : *params.precompiled_variants) {
341 const auto& variant = pair->first.variant;
342 programs.emplace(variant, pair->second);
343 }
344 }
345}
316 346
317Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 347Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
318 Maxwell::ShaderProgram program_type, 348 Maxwell::ShaderProgram program_type,
319 ProgramCode&& program_code, 349 ProgramCode program_code, ProgramCode program_code_b) {
320 ProgramCode&& program_code_b) {
321 const auto code_size{CalculateProgramSize(program_code)};
322 const auto code_size_b{CalculateProgramSize(program_code_b)};
323 auto result{CreateProgram(params.system, params.device, GetProgramType(program_type),
324 program_code, program_code_b)};
325 if (result.first.empty()) {
326 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
327 return {};
328 }
329
330 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 350 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
331 params.unique_identifier, GetProgramType(program_type), 351 params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
332 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), 352
333 std::move(program_code), std::move(program_code_b))); 353 ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)));
334 354 const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
335 return std::shared_ptr<CachedShader>( 355 // TODO(Rodrigo): Handle VertexA shaders
336 new CachedShader(params, GetProgramType(program_type), std::move(result))); 356 // std::optional<ShaderIR> ir_b;
337} 357 // if (!program_code_b.empty()) {
338 358 // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
339Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, 359 // }
340 Maxwell::ShaderProgram program_type,
341 GLShader::ProgramResult result) {
342 return std::shared_ptr<CachedShader>( 360 return std::shared_ptr<CachedShader>(
343 new CachedShader(params, GetProgramType(program_type), std::move(result))); 361 new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
362 std::move(program_code), std::move(program_code_b)));
344} 363}
345 364
346Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { 365Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
347 auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})}; 366 params.disk_cache.SaveRaw(
367 ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
348 368
349 const auto code_size{CalculateProgramSize(code)}; 369 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute);
350 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, 370 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
351 static_cast<u32>(code_size / sizeof(u64)), 0, 371 return std::shared_ptr<CachedShader>(new CachedShader(
352 std::move(code), {})); 372 params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
353
354 return std::shared_ptr<CachedShader>(
355 new CachedShader(params, ProgramType::Compute, std::move(result)));
356} 373}
357 374
358Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, 375Shader CachedShader::CreateFromCache(const ShaderParameters& params,
359 GLShader::ProgramResult result) { 376 const UnspecializedShader& unspecialized) {
360 return std::shared_ptr<CachedShader>( 377 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
361 new CachedShader(params, ProgramType::Compute, std::move(result))); 378 unspecialized.entries, unspecialized.code,
379 unspecialized.code_b));
362} 380}
363 381
364std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 382std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
365 const auto [entry, is_cache_miss] = programs.try_emplace(variant); 383 const auto [entry, is_cache_miss] = programs.try_emplace(variant);
366 auto& program = entry->second; 384 auto& program = entry->second;
367 if (is_cache_miss) { 385 if (is_cache_miss) {
368 program = TryLoadProgram(variant); 386 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
369 if (!program) { 387 if (program_type == ProgramType::Compute) {
370 program = SpecializeShader(code, entries, program_type, variant); 388 engine = &system.GPU().KeplerCompute();
371 disk_cache.SaveUsage(GetUsage(variant)); 389 } else {
390 engine = &system.GPU().Maxwell3D();
372 } 391 }
392 ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine);
393 program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
394 variant, locker);
395 disk_cache.SaveUsage(GetUsage(variant));
373 396
374 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); 397 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
375 } 398 }
@@ -385,14 +408,6 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
385 return {program->handle, base_bindings}; 408 return {program->handle, base_bindings};
386} 409}
387 410
388CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
389 const auto found = precompiled_programs.find(GetUsage(variant));
390 if (found == precompiled_programs.end()) {
391 return {};
392 }
393 return found->second;
394}
395
396ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { 411ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
397 ShaderDiskCacheUsage usage; 412 ShaderDiskCacheUsage usage;
398 usage.unique_identifier = unique_identifier; 413 usage.unique_identifier = unique_identifier;
@@ -412,18 +427,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
412 return; 427 return;
413 } 428 }
414 const auto [raws, shader_usages] = *transferable; 429 const auto [raws, shader_usages] = *transferable;
415 430 if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
416 auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
417
418 const auto supported_formats{GetSupportedFormats()};
419 const auto unspecialized_shaders{
420 GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
421 if (stop_loading) {
422 return; 431 return;
423 } 432 }
424 433
425 // Track if precompiled cache was altered during loading to know if we have to serialize the 434 const auto dumps = disk_cache.LoadPrecompiled();
426 // virtual precompiled cache file back to the hard drive 435 const auto supported_formats = GetSupportedFormats();
436
437 // Track if precompiled cache was altered during loading to know if we have to
438 // serialize the virtual precompiled cache file back to the hard drive
427 bool precompiled_cache_altered = false; 439 bool precompiled_cache_altered = false;
428 440
429 // Inform the frontend about shader build initialization 441 // Inform the frontend about shader build initialization
@@ -446,9 +458,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
446 return; 458 return;
447 } 459 }
448 const auto& usage{shader_usages[i]}; 460 const auto& usage{shader_usages[i]};
449 LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
450 usage.unique_identifier, i, shader_usages.size());
451
452 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; 461 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
453 const auto dump{dumps.find(usage)}; 462 const auto dump{dumps.find(usage)};
454 463
@@ -462,21 +471,27 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
462 } 471 }
463 } 472 }
464 if (!shader) { 473 if (!shader) {
465 shader = SpecializeShader(unspecialized.code, unspecialized.entries, 474 ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type));
466 unspecialized.program_type, usage.variant, true); 475 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
476 unspecialized.code, unspecialized.code_b, usage.variant,
477 locker, true);
467 } 478 }
468 479
469 std::scoped_lock lock(mutex); 480 std::scoped_lock lock{mutex};
470 if (callback) { 481 if (callback) {
471 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, 482 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
472 shader_usages.size()); 483 shader_usages.size());
473 } 484 }
474 485
475 precompiled_programs.emplace(usage, std::move(shader)); 486 precompiled_programs.emplace(usage, std::move(shader));
487
488 // TODO(Rodrigo): Is there a better way to do this?
489 precompiled_variants[usage.unique_identifier].push_back(
490 precompiled_programs.find(usage));
476 } 491 }
477 }; 492 };
478 493
479 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; 494 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
480 const std::size_t bucket_size{shader_usages.size() / num_workers}; 495 const std::size_t bucket_size{shader_usages.size() / num_workers};
481 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); 496 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
482 std::vector<std::thread> threads(num_workers); 497 std::vector<std::thread> threads(num_workers);
@@ -496,7 +511,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
496 if (compilation_failed) { 511 if (compilation_failed) {
497 // Invalidate the precompiled cache if a shader dumped shader was rejected 512 // Invalidate the precompiled cache if a shader dumped shader was rejected
498 disk_cache.InvalidatePrecompiled(); 513 disk_cache.InvalidatePrecompiled();
499 dumps.clear();
500 precompiled_cache_altered = true; 514 precompiled_cache_altered = true;
501 return; 515 return;
502 } 516 }
@@ -504,8 +518,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
504 return; 518 return;
505 } 519 }
506 520
507 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before 521 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
508 // precompiling them 522 // before precompiling them
509 523
510 for (std::size_t i = 0; i < shader_usages.size(); ++i) { 524 for (std::size_t i = 0; i < shader_usages.size(); ++i) {
511 const auto& usage{shader_usages[i]}; 525 const auto& usage{shader_usages[i]};
@@ -521,9 +535,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
521 } 535 }
522} 536}
523 537
524CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( 538const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
525 const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { 539 const auto it = precompiled_variants.find(unique_identifier);
540 return it == precompiled_variants.end() ? nullptr : &it->second;
541}
526 542
543CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
544 const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
527 if (supported_formats.find(dump.binary_format) == supported_formats.end()) { 545 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
528 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); 546 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
529 return {}; 547 return {};
@@ -545,56 +563,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
545 return shader; 563 return shader;
546} 564}
547 565
548std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( 566bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
549 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, 567 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
550 const std::vector<ShaderDiskCacheRaw>& raws, 568 const std::vector<ShaderDiskCacheRaw>& raws) {
551 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
552 std::unordered_map<u64, UnspecializedShader> unspecialized;
553
554 if (callback) { 569 if (callback) {
555 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); 570 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
556 } 571 }
557 572
558 for (std::size_t i = 0; i < raws.size(); ++i) { 573 for (std::size_t i = 0; i < raws.size(); ++i) {
559 if (stop_loading) { 574 if (stop_loading) {
560 return {}; 575 return false;
561 } 576 }
562 const auto& raw{raws[i]}; 577 const auto& raw{raws[i]};
563 const u64 unique_identifier{raw.GetUniqueIdentifier()}; 578 const u64 unique_identifier{raw.GetUniqueIdentifier()};
564 const u64 calculated_hash{ 579 const u64 calculated_hash{
565 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; 580 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
566 if (unique_identifier != calculated_hash) { 581 if (unique_identifier != calculated_hash) {
567 LOG_ERROR( 582 LOG_ERROR(Render_OpenGL,
568 Render_OpenGL, 583 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
569 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", 584 "removing shader cache",
570 raw.GetUniqueIdentifier(), calculated_hash); 585 raw.GetUniqueIdentifier(), calculated_hash);
571 disk_cache.InvalidateTransferable(); 586 disk_cache.InvalidateTransferable();
572 return {}; 587 return false;
573 } 588 }
574 589
575 GLShader::ProgramResult result; 590 const u32 main_offset =
576 if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { 591 raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
577 // If it's stored in the precompiled file, avoid decompiling it here 592 ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
578 const auto& stored_decompiled{it->second}; 593 const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
579 result = {stored_decompiled.code, stored_decompiled.entries}; 594 // TODO(Rodrigo): Handle VertexA shaders
580 } else { 595 // std::optional<ShaderIR> ir_b;
581 // Otherwise decompile the shader at boot and save the result to the decompiled file 596 // if (raw.HasProgramA()) {
582 result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(), 597 // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
583 raw.GetProgramCodeB()); 598 // }
584 disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); 599
585 } 600 UnspecializedShader unspecialized;
586 601 unspecialized.entries = GLShader::GetEntries(ir);
587 precompiled_shaders.insert({unique_identifier, result}); 602 unspecialized.program_type = raw.GetProgramType();
588 603 unspecialized.code = raw.GetProgramCode();
589 unspecialized.insert( 604 unspecialized.code_b = raw.GetProgramCodeB();
590 {raw.GetUniqueIdentifier(), 605 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
591 {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
592 606
593 if (callback) { 607 if (callback) {
594 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); 608 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
595 } 609 }
596 } 610 }
597 return unspecialized; 611 return true;
598} 612}
599 613
600Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 614Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -603,37 +617,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
603 } 617 }
604 618
605 auto& memory_manager{system.GPU().MemoryManager()}; 619 auto& memory_manager{system.GPU().MemoryManager()};
606 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 620 const GPUVAddr address{GetShaderAddress(system, program)};
607 621
608 // Look up shader in the cache based on address 622 // Look up shader in the cache based on address
609 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 623 const auto host_ptr{memory_manager.GetPointer(address)};
610 Shader shader{TryGet(host_ptr)}; 624 Shader shader{TryGet(host_ptr)};
611 if (shader) { 625 if (shader) {
612 return last_shaders[static_cast<std::size_t>(program)] = shader; 626 return last_shaders[static_cast<std::size_t>(program)] = shader;
613 } 627 }
614 628
615 // No shader found - create a new one 629 // No shader found - create a new one
616 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 630 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
617 ProgramCode program_code_b; 631 ProgramCode code_b;
618 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; 632 if (program == Maxwell::ShaderProgram::VertexA) {
619 if (is_program_a) { 633 const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
620 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 634 code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
621 program_code_b = GetShaderCode(memory_manager, program_addr_b, 635 }
622 memory_manager.GetPointer(program_addr_b)); 636
623 } 637 const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
624 638 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
625 const auto unique_identifier = 639 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
626 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); 640 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
627 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 641 cpu_addr, host_ptr, unique_identifier};
628 const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, 642
629 host_ptr, unique_identifier}; 643 const auto found = unspecialized_shaders.find(unique_identifier);
630 644 if (found == unspecialized_shaders.end()) {
631 const auto found = precompiled_shaders.find(unique_identifier); 645 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
632 if (found == precompiled_shaders.end()) { 646 std::move(code_b));
633 shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
634 std::move(program_code_b));
635 } else { 647 } else {
636 shader = CachedShader::CreateStageFromCache(params, program, found->second); 648 shader = CachedShader::CreateFromCache(params, found->second);
637 } 649 }
638 Register(shader); 650 Register(shader);
639 651
@@ -651,15 +663,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
651 // No kernel found - create a new one 663 // No kernel found - create a new one
652 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 664 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
653 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; 665 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
666 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
654 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 667 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
655 const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, 668 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
656 host_ptr, unique_identifier}; 669 cpu_addr, host_ptr, unique_identifier};
657 670
658 const auto found = precompiled_shaders.find(unique_identifier); 671 const auto found = unspecialized_shaders.find(unique_identifier);
659 if (found == precompiled_shaders.end()) { 672 if (found == unspecialized_shaders.end()) {
660 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 673 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
661 } else { 674 } else {
662 kernel = CachedShader::CreateKernelFromCache(params, found->second); 675 kernel = CachedShader::CreateFromCache(params, found->second);
663 } 676 }
664 677
665 Register(kernel); 678 Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6ff78f005..700a83853 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -8,9 +8,10 @@
8#include <atomic> 8#include <atomic>
9#include <bitset> 9#include <bitset>
10#include <memory> 10#include <memory>
11#include <set> 11#include <string>
12#include <tuple> 12#include <tuple>
13#include <unordered_map> 13#include <unordered_map>
14#include <unordered_set>
14#include <vector> 15#include <vector>
15 16
16#include <glad/glad.h> 17#include <glad/glad.h>
@@ -20,6 +21,7 @@
20#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
21#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
24#include "video_core/shader/shader_ir.h"
23 25
24namespace Core { 26namespace Core {
25class System; 27class System;
@@ -40,12 +42,19 @@ using Shader = std::shared_ptr<CachedShader>;
40using CachedProgram = std::shared_ptr<OGLProgram>; 42using CachedProgram = std::shared_ptr<OGLProgram>;
41using Maxwell = Tegra::Engines::Maxwell3D::Regs; 43using Maxwell = Tegra::Engines::Maxwell3D::Regs;
42using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 44using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
43using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; 45using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
46
47struct UnspecializedShader {
48 GLShader::ShaderEntries entries;
49 ProgramType program_type;
50 ProgramCode code;
51 ProgramCode code_b;
52};
44 53
45struct ShaderParameters { 54struct ShaderParameters {
46 ShaderDiskCacheOpenGL& disk_cache;
47 const PrecompiledPrograms& precompiled_programs;
48 Core::System& system; 55 Core::System& system;
56 ShaderDiskCacheOpenGL& disk_cache;
57 const PrecompiledVariants* precompiled_variants;
49 const Device& device; 58 const Device& device;
50 VAddr cpu_addr; 59 VAddr cpu_addr;
51 u8* host_ptr; 60 u8* host_ptr;
@@ -56,23 +65,18 @@ class CachedShader final : public RasterizerCacheObject {
56public: 65public:
57 static Shader CreateStageFromMemory(const ShaderParameters& params, 66 static Shader CreateStageFromMemory(const ShaderParameters& params,
58 Maxwell::ShaderProgram program_type, 67 Maxwell::ShaderProgram program_type,
59 ProgramCode&& program_code, ProgramCode&& program_code_b); 68 ProgramCode program_code, ProgramCode program_code_b);
69 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
60 70
61 static Shader CreateStageFromCache(const ShaderParameters& params, 71 static Shader CreateFromCache(const ShaderParameters& params,
62 Maxwell::ShaderProgram program_type, 72 const UnspecializedShader& unspecialized);
63 GLShader::ProgramResult result);
64
65 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
66
67 static Shader CreateKernelFromCache(const ShaderParameters& params,
68 GLShader::ProgramResult result);
69 73
70 VAddr GetCpuAddr() const override { 74 VAddr GetCpuAddr() const override {
71 return cpu_addr; 75 return cpu_addr;
72 } 76 }
73 77
74 std::size_t GetSizeInBytes() const override { 78 std::size_t GetSizeInBytes() const override {
75 return shader_length; 79 return program_code.size() * sizeof(u64);
76 } 80 }
77 81
78 /// Gets the shader entries for the shader 82 /// Gets the shader entries for the shader
@@ -85,21 +89,24 @@ public:
85 89
86private: 90private:
87 explicit CachedShader(const ShaderParameters& params, ProgramType program_type, 91 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
88 GLShader::ProgramResult result); 92 GLShader::ShaderEntries entries, ProgramCode program_code,
89 93 ProgramCode program_code_b);
90 CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
91 94
92 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; 95 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
93 96
97 Core::System& system;
98 ShaderDiskCacheOpenGL& disk_cache;
99 const Device& device;
100
94 VAddr cpu_addr{}; 101 VAddr cpu_addr{};
102
95 u64 unique_identifier{}; 103 u64 unique_identifier{};
96 ProgramType program_type{}; 104 ProgramType program_type{};
97 ShaderDiskCacheOpenGL& disk_cache;
98 const PrecompiledPrograms& precompiled_programs;
99 105
100 GLShader::ShaderEntries entries; 106 GLShader::ShaderEntries entries;
101 std::string code; 107
102 std::size_t shader_length{}; 108 ProgramCode program_code;
109 ProgramCode program_code_b;
103 110
104 std::unordered_map<ProgramVariant, CachedProgram> programs; 111 std::unordered_map<ProgramVariant, CachedProgram> programs;
105}; 112};
@@ -124,21 +131,26 @@ protected:
124 void FlushObjectInner(const Shader& object) override {} 131 void FlushObjectInner(const Shader& object) override {}
125 132
126private: 133private:
127 std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( 134 bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
128 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, 135 const VideoCore::DiskResourceLoadCallback& callback,
129 const std::vector<ShaderDiskCacheRaw>& raws, 136 const std::vector<ShaderDiskCacheRaw>& raws);
130 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
131 137
132 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, 138 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
133 const std::set<GLenum>& supported_formats); 139 const std::unordered_set<GLenum>& supported_formats);
140
141 const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
134 142
135 Core::System& system; 143 Core::System& system;
136 Core::Frontend::EmuWindow& emu_window; 144 Core::Frontend::EmuWindow& emu_window;
137 const Device& device; 145 const Device& device;
146
138 ShaderDiskCacheOpenGL disk_cache; 147 ShaderDiskCacheOpenGL disk_cache;
139 148
140 PrecompiledShaders precompiled_shaders;
141 PrecompiledPrograms precompiled_programs; 149 PrecompiledPrograms precompiled_programs;
150 std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
151
152 std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
153
142 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 154 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
143}; 155};
144 156
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 71d7389cb..030550c53 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -415,27 +415,6 @@ public:
415 return code.GetResult(); 415 return code.GetResult();
416 } 416 }
417 417
418 ShaderEntries GetShaderEntries() const {
419 ShaderEntries entries;
420 for (const auto& cbuf : ir.GetConstantBuffers()) {
421 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
422 cbuf.first);
423 }
424 for (const auto& sampler : ir.GetSamplers()) {
425 entries.samplers.emplace_back(sampler);
426 }
427 for (const auto& [offset, image] : ir.GetImages()) {
428 entries.images.emplace_back(image);
429 }
430 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
431 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
432 usage.is_read, usage.is_written);
433 }
434 entries.clip_distances = ir.GetClipDistances();
435 entries.shader_length = ir.GetLength();
436 return entries;
437 }
438
439private: 418private:
440 friend class ASTDecompiler; 419 friend class ASTDecompiler;
441 friend class ExprDecompiler; 420 friend class ExprDecompiler;
@@ -2481,25 +2460,46 @@ void GLSLDecompiler::DecompileAST() {
2481 2460
2482} // Anonymous namespace 2461} // Anonymous namespace
2483 2462
2463ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
2464 ShaderEntries entries;
2465 for (const auto& cbuf : ir.GetConstantBuffers()) {
2466 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2467 cbuf.first);
2468 }
2469 for (const auto& sampler : ir.GetSamplers()) {
2470 entries.samplers.emplace_back(sampler);
2471 }
2472 for (const auto& [offset, image] : ir.GetImages()) {
2473 entries.images.emplace_back(image);
2474 }
2475 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2476 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2477 usage.is_written);
2478 }
2479 entries.clip_distances = ir.GetClipDistances();
2480 entries.shader_length = ir.GetLength();
2481 return entries;
2482}
2483
2484std::string GetCommonDeclarations() { 2484std::string GetCommonDeclarations() {
2485 return fmt::format( 2485 return R"(#define ftoi floatBitsToInt
2486 "#define ftoi floatBitsToInt\n" 2486#define ftou floatBitsToUint
2487 "#define ftou floatBitsToUint\n" 2487#define itof intBitsToFloat
2488 "#define itof intBitsToFloat\n" 2488#define utof uintBitsToFloat
2489 "#define utof uintBitsToFloat\n\n" 2489
2490 "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" 2490bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2491 " bvec2 is_nan1 = isnan(pair1);\n" 2491 bvec2 is_nan1 = isnan(pair1);
2492 " bvec2 is_nan2 = isnan(pair2);\n" 2492 bvec2 is_nan2 = isnan(pair2);
2493 " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " 2493 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2494 "is_nan2.y);\n" 2494}
2495 "}}\n\n"); 2495)";
2496} 2496}
2497 2497
2498ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, 2498std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
2499 const std::string& suffix) { 2499 const std::string& suffix) {
2500 GLSLDecompiler decompiler(device, ir, stage, suffix); 2500 GLSLDecompiler decompiler(device, ir, stage, suffix);
2501 decompiler.Decompile(); 2501 decompiler.Decompile();
2502 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 2502 return decompiler.GetResult();
2503} 2503}
2504 2504
2505} // namespace OpenGL::GLShader 2505} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e538dc001..fead2a51e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -34,10 +34,7 @@ enum class ProgramType : u32 {
34 34
35namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
36 36
37struct ShaderEntries;
38
39using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
40using ProgramResult = std::pair<std::string, ShaderEntries>;
41using SamplerEntry = VideoCommon::Shader::Sampler; 38using SamplerEntry = VideoCommon::Shader::Sampler;
42using ImageEntry = VideoCommon::Shader::Image; 39using ImageEntry = VideoCommon::Shader::Image;
43 40
@@ -93,9 +90,11 @@ struct ShaderEntries {
93 std::size_t shader_length{}; 90 std::size_t shader_length{};
94}; 91};
95 92
93ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
94
96std::string GetCommonDeclarations(); 95std::string GetCommonDeclarations();
97 96
98ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 97std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
99 ProgramType stage, const std::string& suffix); 98 ProgramType stage, const std::string& suffix);
100 99
101} // namespace OpenGL::GLShader 100} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 74cc33476..ddc19dccd 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,12 +29,7 @@ enum class TransferableEntryKind : u32 {
29 Usage, 29 Usage,
30}; 30};
31 31
32enum class PrecompiledEntryKind : u32 { 32constexpr u32 NativeVersion = 5;
33 Decompiled,
34 Dump,
35};
36
37constexpr u32 NativeVersion = 4;
38 33
39// Making sure sizes doesn't change by accident 34// Making sure sizes doesn't change by accident
40static_assert(sizeof(BaseBindings) == 16); 35static_assert(sizeof(BaseBindings) == 16);
@@ -49,13 +44,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
49 return hash; 44 return hash;
50} 45}
51 46
52} // namespace 47} // Anonymous namespace
53 48
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 49ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 50 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 51 : unique_identifier{unique_identifier}, program_type{program_type},
58 program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
59 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} 52 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
60 53
61ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 54ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -90,15 +83,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
90bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 83bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
91 if (file.WriteObject(unique_identifier) != 1 || 84 if (file.WriteObject(unique_identifier) != 1 ||
92 file.WriteObject(static_cast<u32>(program_type)) != 1 || 85 file.WriteObject(static_cast<u32>(program_type)) != 1 ||
93 file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { 86 file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
87 file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
94 return false; 88 return false;
95 } 89 }
96 90
97 if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) 91 if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
98 return false; 92 return false;
99 93
100 if (HasProgramA() && 94 if (HasProgramA() &&
101 file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { 95 file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
102 return false; 96 return false;
103 } 97 }
104 return true; 98 return true;
@@ -186,13 +180,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
186 return {{std::move(raws), std::move(usages)}}; 180 return {{std::move(raws), std::move(usages)}};
187} 181}
188 182
189std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> 183std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
190ShaderDiskCacheOpenGL::LoadPrecompiled() { 184ShaderDiskCacheOpenGL::LoadPrecompiled() {
191 if (!is_usable) { 185 if (!is_usable) {
192 return {}; 186 return {};
193 } 187 }
194 188
195 FileUtil::IOFile file(GetPrecompiledPath(), "rb"); 189 std::string path = GetPrecompiledPath();
190 FileUtil::IOFile file(path, "rb");
196 if (!file.IsOpen()) { 191 if (!file.IsOpen()) {
197 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", 192 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
198 GetTitleID()); 193 GetTitleID());
@@ -211,7 +206,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
211 return *result; 206 return *result;
212} 207}
213 208
214std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> 209std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
215ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 210ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
216 // Read compressed file from disk and decompress to virtual precompiled cache file 211 // Read compressed file from disk and decompress to virtual precompiled cache file
217 std::vector<u8> compressed(file.GetSize()); 212 std::vector<u8> compressed(file.GetSize());
@@ -231,238 +226,31 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
231 return {}; 226 return {};
232 } 227 }
233 228
234 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
235 ShaderDumpsMap dumps; 229 ShaderDumpsMap dumps;
236 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { 230 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
237 PrecompiledEntryKind kind{}; 231 ShaderDiskCacheUsage usage;
238 if (!LoadObjectFromPrecompiled(kind)) { 232 if (!LoadObjectFromPrecompiled(usage)) {
239 return {}; 233 return {};
240 } 234 }
241 235
242 switch (kind) { 236 ShaderDiskCacheDump dump;
243 case PrecompiledEntryKind::Decompiled: { 237 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
244 u64 unique_identifier{};
245 if (!LoadObjectFromPrecompiled(unique_identifier)) {
246 return {};
247 }
248
249 auto entry = LoadDecompiledEntry();
250 if (!entry) {
251 return {};
252 }
253 decompiled.insert({unique_identifier, std::move(*entry)});
254 break;
255 }
256 case PrecompiledEntryKind::Dump: {
257 ShaderDiskCacheUsage usage;
258 if (!LoadObjectFromPrecompiled(usage)) {
259 return {};
260 }
261
262 ShaderDiskCacheDump dump;
263 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
264 return {};
265 }
266
267 u32 binary_length{};
268 if (!LoadObjectFromPrecompiled(binary_length)) {
269 return {};
270 }
271
272 dump.binary.resize(binary_length);
273 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
274 return {};
275 }
276
277 dumps.insert({usage, dump});
278 break;
279 }
280 default:
281 return {}; 238 return {};
282 } 239 }
283 }
284 return {{decompiled, dumps}};
285}
286 240
287std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { 241 u32 binary_length{};
288 u32 code_size{}; 242 if (!LoadObjectFromPrecompiled(binary_length)) {
289 if (!LoadObjectFromPrecompiled(code_size)) {
290 return {};
291 }
292
293 std::string code(code_size, '\0');
294 if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
295 return {};
296 }
297
298 ShaderDiskCacheDecompiled entry;
299 entry.code = std::move(code);
300
301 u32 const_buffers_count{};
302 if (!LoadObjectFromPrecompiled(const_buffers_count)) {
303 return {};
304 }
305
306 for (u32 i = 0; i < const_buffers_count; ++i) {
307 u32 max_offset{};
308 u32 index{};
309 bool is_indirect{};
310 if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
311 !LoadObjectFromPrecompiled(is_indirect)) {
312 return {}; 243 return {};
313 } 244 }
314 entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
315 }
316 245
317 u32 samplers_count{}; 246 dump.binary.resize(binary_length);
318 if (!LoadObjectFromPrecompiled(samplers_count)) { 247 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
319 return {};
320 }
321
322 for (u32 i = 0; i < samplers_count; ++i) {
323 u64 offset{};
324 u64 index{};
325 u32 type{};
326 bool is_array{};
327 bool is_shadow{};
328 bool is_bindless{};
329 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
330 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
331 !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
332 return {}; 248 return {};
333 } 249 }
334 entry.entries.samplers.emplace_back(
335 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
336 static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
337 }
338 250
339 u32 images_count{}; 251 dumps.emplace(usage, dump);
340 if (!LoadObjectFromPrecompiled(images_count)) {
341 return {};
342 }
343 for (u32 i = 0; i < images_count; ++i) {
344 u64 offset{};
345 u64 index{};
346 u32 type{};
347 u8 is_bindless{};
348 u8 is_written{};
349 u8 is_read{};
350 u8 is_atomic{};
351 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
352 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
353 !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
354 !LoadObjectFromPrecompiled(is_atomic)) {
355 return {};
356 }
357 entry.entries.images.emplace_back(
358 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
359 static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
360 is_read != 0, is_atomic != 0);
361 }
362
363 u32 global_memory_count{};
364 if (!LoadObjectFromPrecompiled(global_memory_count)) {
365 return {};
366 } 252 }
367 for (u32 i = 0; i < global_memory_count; ++i) { 253 return dumps;
368 u32 cbuf_index{};
369 u32 cbuf_offset{};
370 bool is_read{};
371 bool is_written{};
372 if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
373 !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
374 return {};
375 }
376 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
377 is_written);
378 }
379
380 for (auto& clip_distance : entry.entries.clip_distances) {
381 if (!LoadObjectFromPrecompiled(clip_distance)) {
382 return {};
383 }
384 }
385
386 u64 shader_length{};
387 if (!LoadObjectFromPrecompiled(shader_length)) {
388 return {};
389 }
390 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
391
392 return entry;
393}
394
395bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
396 const GLShader::ShaderEntries& entries) {
397 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
398 !SaveObjectToPrecompiled(unique_identifier) ||
399 !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
400 !SaveArrayToPrecompiled(code.data(), code.size())) {
401 return false;
402 }
403
404 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
405 return false;
406 }
407 for (const auto& cbuf : entries.const_buffers) {
408 if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
409 !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
410 !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
411 return false;
412 }
413 }
414
415 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
416 return false;
417 }
418 for (const auto& sampler : entries.samplers) {
419 if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
420 !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
421 !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
422 !SaveObjectToPrecompiled(sampler.IsArray()) ||
423 !SaveObjectToPrecompiled(sampler.IsShadow()) ||
424 !SaveObjectToPrecompiled(sampler.IsBindless())) {
425 return false;
426 }
427 }
428
429 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
430 return false;
431 }
432 for (const auto& image : entries.images) {
433 if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
434 !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
435 !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
436 !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
437 !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
438 !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
439 !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) {
440 return false;
441 }
442 }
443
444 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
445 return false;
446 }
447 for (const auto& gmem : entries.global_memory_entries) {
448 if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
449 !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
450 !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
451 return false;
452 }
453 }
454
455 for (const bool clip_distance : entries.clip_distances) {
456 if (!SaveObjectToPrecompiled(clip_distance)) {
457 return false;
458 }
459 }
460
461 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
462 return false;
463 }
464
465 return true;
466} 254}
467 255
468void ShaderDiskCacheOpenGL::InvalidateTransferable() { 256void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -532,28 +320,18 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
532 } 320 }
533} 321}
534 322
535void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, 323void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
536 const GLShader::ShaderEntries& entries) {
537 if (!is_usable) { 324 if (!is_usable) {
538 return; 325 return;
539 } 326 }
540 327
328 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
329 // when writing the dump. This should be done the moment I get access to write to the virtual
330 // file.
541 if (precompiled_cache_virtual_file.GetSize() == 0) { 331 if (precompiled_cache_virtual_file.GetSize() == 0) {
542 SavePrecompiledHeaderToVirtualPrecompiledCache(); 332 SavePrecompiledHeaderToVirtualPrecompiledCache();
543 } 333 }
544 334
545 if (!SaveDecompiledFile(unique_identifier, code, entries)) {
546 LOG_ERROR(Render_OpenGL,
547 "Failed to save decompiled entry to the precompiled file - removing");
548 InvalidatePrecompiled();
549 }
550}
551
552void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
553 if (!is_usable) {
554 return;
555 }
556
557 GLint binary_length{}; 335 GLint binary_length{};
558 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); 336 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
559 337
@@ -561,8 +339,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
561 std::vector<u8> binary(binary_length); 339 std::vector<u8> binary(binary_length);
562 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 340 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
563 341
564 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || 342 if (!SaveObjectToPrecompiled(usage) ||
565 !SaveObjectToPrecompiled(usage) ||
566 !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || 343 !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
567 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || 344 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
568 !SaveArrayToPrecompiled(binary.data(), binary.size())) { 345 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
@@ -574,8 +351,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
574} 351}
575 352
576FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { 353FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
577 if (!EnsureDirectories()) 354 if (!EnsureDirectories()) {
578 return {}; 355 return {};
356 }
579 357
580 const auto transferable_path{GetTransferablePath()}; 358 const auto transferable_path{GetTransferablePath()};
581 const bool existed = FileUtil::Exists(transferable_path); 359 const bool existed = FileUtil::Exists(transferable_path);
@@ -607,8 +385,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
607 385
608void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { 386void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
609 precompiled_cache_virtual_file_offset = 0; 387 precompiled_cache_virtual_file_offset = 0;
610 const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); 388 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
611 const std::vector<u8>& compressed = 389 const std::vector<u8> compressed =
612 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); 390 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
613 391
614 const auto precompiled_path{GetPrecompiledPath()}; 392 const auto precompiled_path{GetPrecompiledPath()};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 9595bd71b..61b46d728 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -123,8 +123,7 @@ namespace OpenGL {
123class ShaderDiskCacheRaw { 123class ShaderDiskCacheRaw {
124public: 124public:
125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
126 u32 program_code_size, u32 program_code_size_b, 126 ProgramCode program_code, ProgramCode program_code_b = {});
127 ProgramCode program_code, ProgramCode program_code_b);
128 ShaderDiskCacheRaw(); 127 ShaderDiskCacheRaw();
129 ~ShaderDiskCacheRaw(); 128 ~ShaderDiskCacheRaw();
130 129
@@ -155,22 +154,14 @@ public:
155private: 154private:
156 u64 unique_identifier{}; 155 u64 unique_identifier{};
157 ProgramType program_type{}; 156 ProgramType program_type{};
158 u32 program_code_size{};
159 u32 program_code_size_b{};
160 157
161 ProgramCode program_code; 158 ProgramCode program_code;
162 ProgramCode program_code_b; 159 ProgramCode program_code_b;
163}; 160};
164 161
165/// Contains decompiled data from a shader
166struct ShaderDiskCacheDecompiled {
167 std::string code;
168 GLShader::ShaderEntries entries;
169};
170
171/// Contains an OpenGL dumped binary program 162/// Contains an OpenGL dumped binary program
172struct ShaderDiskCacheDump { 163struct ShaderDiskCacheDump {
173 GLenum binary_format; 164 GLenum binary_format{};
174 std::vector<u8> binary; 165 std::vector<u8> binary;
175}; 166};
176 167
@@ -184,9 +175,7 @@ public:
184 LoadTransferable(); 175 LoadTransferable();
185 176
186 /// Loads current game's precompiled cache. Invalidates on failure. 177 /// Loads current game's precompiled cache. Invalidates on failure.
187 std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 178 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
188 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
189 LoadPrecompiled();
190 179
191 /// Removes the transferable (and precompiled) cache file. 180 /// Removes the transferable (and precompiled) cache file.
192 void InvalidateTransferable(); 181 void InvalidateTransferable();
@@ -200,10 +189,6 @@ public:
200 /// Saves shader usage to the transferable file. Does not check for collisions. 189 /// Saves shader usage to the transferable file. Does not check for collisions.
201 void SaveUsage(const ShaderDiskCacheUsage& usage); 190 void SaveUsage(const ShaderDiskCacheUsage& usage);
202 191
203 /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
204 void SaveDecompiled(u64 unique_identifier, const std::string& code,
205 const GLShader::ShaderEntries& entries);
206
207 /// Saves a dump entry to the precompiled file. Does not check for collisions. 192 /// Saves a dump entry to the precompiled file. Does not check for collisions.
208 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 193 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
209 194
@@ -212,18 +197,9 @@ public:
212 197
213private: 198private:
214 /// Loads the transferable cache. Returns empty on failure. 199 /// Loads the transferable cache. Returns empty on failure.
215 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 200 std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
216 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
217 LoadPrecompiledFile(FileUtil::IOFile& file); 201 LoadPrecompiledFile(FileUtil::IOFile& file);
218 202
219 /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
220 /// failure.
221 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
222
223 /// Saves a decompiled entry to the passed file. Returns true on success.
224 bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
225 const GLShader::ShaderEntries& entries);
226
227 /// Opens current game's transferable file and write it's header if it doesn't exist 203 /// Opens current game's transferable file and write it's header if it doesn't exist
228 FileUtil::IOFile AppendTransferableFile() const; 204 FileUtil::IOFile AppendTransferableFile() const;
229 205
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 817c6e12c..0e22eede9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -16,18 +16,8 @@ using VideoCommon::Shader::CompilerSettings;
16using VideoCommon::Shader::ProgramCode; 16using VideoCommon::Shader::ProgramCode;
17using VideoCommon::Shader::ShaderIR; 17using VideoCommon::Shader::ShaderIR;
18 18
19static constexpr u32 PROGRAM_OFFSET = 10; 19std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
20static constexpr u32 COMPUTE_OFFSET = 0; 20 std::string out = GetCommonDeclarations();
21
22static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
23
24ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device,
25 const ShaderSetup& setup) {
26 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
27
28 std::string out = "// Shader Unique Id: VS" + id + "\n\n";
29 out += GetCommonDeclarations();
30
31 out += R"( 21 out += R"(
32layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { 22layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
33 vec4 viewport_flip; 23 vec4 viewport_flip;
@@ -35,18 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
35}; 25};
36 26
37)"; 27)";
38 28 const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
39 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, 29 out += Decompile(device, ir, stage, "vertex");
40 locker); 30 if (ir_b) {
41 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; 31 out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
42 ProgramResult program = Decompile(device, program_ir, stage, "vertex");
43 out += program.first;
44
45 if (setup.IsDualProgram()) {
46 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
47 settings, locker);
48 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
49 out += program_b.first;
50 } 32 }
51 33
52 out += R"( 34 out += R"(
@@ -54,7 +36,7 @@ void main() {
54 execute_vertex(); 36 execute_vertex();
55)"; 37)";
56 38
57 if (setup.IsDualProgram()) { 39 if (ir_b) {
58 out += " execute_vertex_b();"; 40 out += " execute_vertex_b();";
59 } 41 }
60 42
@@ -68,18 +50,13 @@ void main() {
68 // Viewport can be flipped, which is unsupported by glViewport 50 // Viewport can be flipped, which is unsupported by glViewport
69 gl_Position.xy *= viewport_flip.xy; 51 gl_Position.xy *= viewport_flip.xy;
70 } 52 }
71})"; 53}
72 54)";
73 return {std::move(out), std::move(program.second)}; 55 return out;
74} 56}
75 57
76ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, 58std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
77 const ShaderSetup& setup) { 59 std::string out = GetCommonDeclarations();
78 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
79
80 std::string out = "// Shader Unique Id: GS" + id + "\n\n";
81 out += GetCommonDeclarations();
82
83 out += R"( 60 out += R"(
84layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { 61layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
85 vec4 viewport_flip; 62 vec4 viewport_flip;
@@ -87,27 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
87}; 64};
88 65
89)"; 66)";
90 67 out += Decompile(device, ir, ProgramType::Geometry, "geometry");
91 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
92 locker);
93 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
94 out += program.first;
95 68
96 out += R"( 69 out += R"(
97void main() { 70void main() {
98 execute_geometry(); 71 execute_geometry();
99};)"; 72}
100 73)";
101 return {std::move(out), std::move(program.second)}; 74 return out;
102} 75}
103 76
104ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, 77std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
105 const ShaderSetup& setup) { 78 std::string out = GetCommonDeclarations();
106 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
107
108 std::string out = "// Shader Unique Id: FS" + id + "\n\n";
109 out += GetCommonDeclarations();
110
111 out += R"( 79 out += R"(
112layout (location = 0) out vec4 FragColor0; 80layout (location = 0) out vec4 FragColor0;
113layout (location = 1) out vec4 FragColor1; 81layout (location = 1) out vec4 FragColor1;
@@ -124,39 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
124}; 92};
125 93
126)"; 94)";
127 95 out += Decompile(device, ir, ProgramType::Fragment, "fragment");
128 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings,
129 locker);
130 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
131 out += program.first;
132 96
133 out += R"( 97 out += R"(
134void main() { 98void main() {
135 execute_fragment(); 99 execute_fragment();
136} 100}
137
138)"; 101)";
139 return {std::move(out), std::move(program.second)}; 102 return out;
140} 103}
141 104
142ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, 105std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
143 const ShaderSetup& setup) { 106 std::string out = GetCommonDeclarations();
144 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 107 out += Decompile(device, ir, ProgramType::Compute, "compute");
145
146 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
147 out += GetCommonDeclarations();
148
149 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings,
150 locker);
151 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
152 out += program.first;
153
154 out += R"( 108 out += R"(
155void main() { 109void main() {
156 execute_compute(); 110 execute_compute();
157} 111}
158)"; 112)";
159 return {std::move(out), std::move(program.second)}; 113 return out;
160} 114}
161 115
162} // namespace OpenGL::GLShader 116} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 05f157298..cba2be9f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -16,50 +16,19 @@ class Device;
16 16
17namespace OpenGL::GLShader { 17namespace OpenGL::GLShader {
18 18
19using VideoCommon::Shader::ConstBufferLocker;
20using VideoCommon::Shader::ProgramCode; 19using VideoCommon::Shader::ProgramCode;
21 20using VideoCommon::Shader::ShaderIR;
22struct ShaderSetup {
23 explicit ShaderSetup(ProgramCode program_code) {
24 program.code = std::move(program_code);
25 }
26
27 struct {
28 ProgramCode code;
29 ProgramCode code_b; // Used for dual vertex shaders
30 u64 unique_identifier;
31 std::size_t size_a;
32 std::size_t size_b;
33 } program;
34
35 /// Used in scenarios where we have a dual vertex shaders
36 void SetProgramB(ProgramCode program_b) {
37 program.code_b = std::move(program_b);
38 has_program_b = true;
39 }
40
41 bool IsDualProgram() const {
42 return has_program_b;
43 }
44
45private:
46 bool has_program_b{};
47};
48 21
49/// Generates the GLSL vertex shader program source code for the given VS program 22/// Generates the GLSL vertex shader program source code for the given VS program
50ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, 23std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
51 const ShaderSetup& setup);
52 24
53/// Generates the GLSL geometry shader program source code for the given GS program 25/// Generates the GLSL geometry shader program source code for the given GS program
54ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, 26std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
55 const ShaderSetup& setup);
56 27
57/// Generates the GLSL fragment shader program source code for the given FS program 28/// Generates the GLSL fragment shader program source code for the given FS program
58ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, 29std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
59 const ShaderSetup& setup);
60 30
61/// Generates the GLSL compute shader program source code for the given CS program 31/// Generates the GLSL compute shader program source code for the given CS program
62ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, 32std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
63 const ShaderSetup& setup);
64 33
65} // namespace OpenGL::GLShader 34} // namespace OpenGL::GLShader
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
index 9d23bcecf..37a0968a1 100644
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -15,15 +15,15 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
15 : engine{nullptr}, shader_stage{shader_stage} {} 15 : engine{nullptr}, shader_stage{shader_stage} {}
16 16
17ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, 17ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
18 Tegra::Engines::ConstBufferEngineInterface* engine) 18 Tegra::Engines::ConstBufferEngineInterface& engine)
19 : engine{engine}, shader_stage{shader_stage} {} 19 : engine{&engine}, shader_stage{shader_stage} {}
20 20
21bool ConstBufferLocker::IsEngineSet() const { 21bool ConstBufferLocker::IsEngineSet() const {
22 return engine != nullptr; 22 return engine != nullptr;
23} 23}
24 24
25void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { 25void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) {
26 engine = engine_; 26 engine = &engine_;
27} 27}
28 28
29std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { 29std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index 13eeba320..54459977f 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -21,14 +21,14 @@ public:
21 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); 21 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
22 22
23 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, 23 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
24 Tegra::Engines::ConstBufferEngineInterface* engine); 24 Tegra::Engines::ConstBufferEngineInterface& engine);
25 25
26 // Checks if an engine is setup, it may be possible that during disk shader 26 // Checks if an engine is setup, it may be possible that during disk shader
27 // cache run, the engines have not been created yet. 27 // cache run, the engines have not been created yet.
28 bool IsEngineSet() const; 28 bool IsEngineSet() const;
29 29
30 // Use this to set/change the engine used for this shader. 30 // Use this to set/change the engine used for this shader.
31 void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); 31 void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine);
32 32
33 // Retrieves a key from the locker, if it's registered, it will give the 33 // Retrieves a key from the locker, if it's registered, it will give the
34 // registered value, if not it will obtain it from maxwell3d and register it. 34 // registered value, if not it will obtain it from maxwell3d and register it.
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index d1c269ea7..6c698bcff 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -66,10 +66,11 @@ struct BlockInfo {
66}; 66};
67 67
68struct CFGRebuildState { 68struct CFGRebuildState {
69 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, 69 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
70 const u32 start, ConstBufferLocker& locker) 70 : program_code{program_code}, start{start}, locker{locker} {}
71 : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {}
72 71
72 const ProgramCode& program_code;
73 ConstBufferLocker& locker;
73 u32 start{}; 74 u32 start{};
74 std::vector<BlockInfo> block_info{}; 75 std::vector<BlockInfo> block_info{};
75 std::list<u32> inspect_queries{}; 76 std::list<u32> inspect_queries{};
@@ -79,10 +80,7 @@ struct CFGRebuildState {
79 std::map<u32, u32> ssy_labels{}; 80 std::map<u32, u32> ssy_labels{};
80 std::map<u32, u32> pbk_labels{}; 81 std::map<u32, u32> pbk_labels{};
81 std::unordered_map<u32, BlockStack> stacks{}; 82 std::unordered_map<u32, BlockStack> stacks{};
82 const ProgramCode& program_code;
83 const std::size_t program_size;
84 ASTManager* manager; 83 ASTManager* manager;
85 ConstBufferLocker& locker;
86}; 84};
87 85
88enum class BlockCollision : u32 { None, Found, Inside }; 86enum class BlockCollision : u32 { None, Found, Inside };
@@ -242,7 +240,7 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
242 240
243std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { 241std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
244 u32 offset = static_cast<u32>(address); 242 u32 offset = static_cast<u32>(address);
245 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); 243 const u32 end_address = static_cast<u32>(state.program_code.size());
246 ParseInfo parse_info{}; 244 ParseInfo parse_info{};
247 SingleBranch single_branch{}; 245 SingleBranch single_branch{};
248 246
@@ -583,6 +581,7 @@ bool TryQuery(CFGRebuildState& state) {
583 } 581 }
584 return true; 582 return true;
585} 583}
584
586} // Anonymous namespace 585} // Anonymous namespace
587 586
588void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { 587void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
@@ -651,8 +650,7 @@ void DecompileShader(CFGRebuildState& state) {
651 state.manager->Decompile(); 650 state.manager->Decompile();
652} 651}
653 652
654std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, 653std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
655 std::size_t program_size, u32 start_address,
656 const CompilerSettings& settings, 654 const CompilerSettings& settings,
657 ConstBufferLocker& locker) { 655 ConstBufferLocker& locker) {
658 auto result_out = std::make_unique<ShaderCharacteristics>(); 656 auto result_out = std::make_unique<ShaderCharacteristics>();
@@ -661,7 +659,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
661 return result_out; 659 return result_out;
662 } 660 }
663 661
664 CFGRebuildState state{program_code, program_size, start_address, locker}; 662 CFGRebuildState state{program_code, start_address, locker};
665 // Inspect Code and generate blocks 663 // Inspect Code and generate blocks
666 state.labels.clear(); 664 state.labels.clear();
667 state.labels.emplace(start_address); 665 state.labels.emplace(start_address);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 369ca255b..288ee68af 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -105,8 +105,7 @@ struct ShaderCharacteristics {
105 CompilerSettings settings{}; 105 CompilerSettings settings{};
106}; 106};
107 107
108std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, 108std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
109 std::size_t program_size, u32 start_address,
110 const CompilerSettings& settings, 109 const CompilerSettings& settings,
111 ConstBufferLocker& locker); 110 ConstBufferLocker& locker);
112 111
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 053241128..e1afa4582 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
33 return (absolute_offset % SchedPeriod) == 0; 33 return (absolute_offset % SchedPeriod) == 0;
34} 34}
35 35
36} // namespace 36} // Anonymous namespace
37 37
38class ASTDecoder { 38class ASTDecoder {
39public: 39public:
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
103 103
104 decompiled = false; 104 decompiled = false;
105 auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); 105 auto info = ScanFlow(program_code, main_offset, settings, locker);
106 auto& shader_info = *info; 106 auto& shader_info = *info;
107 coverage_begin = shader_info.start; 107 coverage_begin = shader_info.start;
108 coverage_end = shader_info.end; 108 coverage_end = shader_info.end;
@@ -155,7 +155,7 @@ void ShaderIR::Decode() {
155 [[fallthrough]]; 155 [[fallthrough]];
156 case CompileDepth::BruteForce: { 156 case CompileDepth::BruteForce: {
157 coverage_begin = main_offset; 157 coverage_begin = main_offset;
158 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); 158 const u32 shader_end = program_code.size();
159 coverage_end = shader_end; 159 coverage_end = shader_end;
160 for (u32 label = main_offset; label < shader_end; label++) { 160 for (u32 label = main_offset; label < shader_end; label++) {
161 basic_blocks.insert({label, DecodeRange(label, label + 1)}); 161 basic_blocks.insert({label, DecodeRange(label, label + 1)});
@@ -225,7 +225,8 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
225 for (auto& branch_case : multi_branch->branches) { 225 for (auto& branch_case : multi_branch->branches) {
226 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); 226 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
227 Node op_b = Immediate(branch_case.cmp_value); 227 Node op_b = Immediate(branch_case.cmp_value);
228 Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); 228 Node condition =
229 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
229 auto result = Conditional(condition, {n}); 230 auto result = Conditional(condition, {n});
230 bb.push_back(result); 231 bb.push_back(result);
231 global_code.push_back(result); 232 global_code.push_back(result);
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 6430575ec..1d718ccc6 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -22,10 +22,9 @@ using Tegra::Shader::PredCondition;
22using Tegra::Shader::PredOperation; 22using Tegra::Shader::PredOperation;
23using Tegra::Shader::Register; 23using Tegra::Shader::Register;
24 24
25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, 25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
26 CompilerSettings settings, ConstBufferLocker& locker) 26 ConstBufferLocker& locker)
27 : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, 27 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
28 program_manager{true, true}, settings{settings}, locker{locker} {
29 Decode(); 28 Decode();
30} 29}
31 30
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 3a3e381d2..3ebea91b9 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -67,8 +67,8 @@ struct GlobalMemoryUsage {
67 67
68class ShaderIR final { 68class ShaderIR final {
69public: 69public:
70 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, 70 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
71 CompilerSettings settings, ConstBufferLocker& locker); 71 ConstBufferLocker& locker);
72 ~ShaderIR(); 72 ~ShaderIR();
73 73
74 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 74 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -384,7 +384,9 @@ private:
384 384
385 const ProgramCode& program_code; 385 const ProgramCode& program_code;
386 const u32 main_offset; 386 const u32 main_offset;
387 const std::size_t program_size; 387 const CompilerSettings settings;
388 ConstBufferLocker& locker;
389
388 bool decompiled{}; 390 bool decompiled{};
389 bool disable_flow_stack{}; 391 bool disable_flow_stack{};
390 392
@@ -393,9 +395,7 @@ private:
393 395
394 std::map<u32, NodeBlock> basic_blocks; 396 std::map<u32, NodeBlock> basic_blocks;
395 NodeBlock global_code; 397 NodeBlock global_code;
396 ASTManager program_manager; 398 ASTManager program_manager{true, true};
397 CompilerSettings settings{};
398 ConstBufferLocker& locker;
399 399
400 std::set<u32> used_registers; 400 std::set<u32> used_registers;
401 std::set<Tegra::Shader::Pred> used_predicates; 401 std::set<Tegra::Shader::Pred> used_predicates;