diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 489 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 66 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 280 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 32 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 94 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 41 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 4 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/shader/control_flow.h | 3 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 12 |
15 files changed, 420 insertions, 722 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9e2799876..6402d6763 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -3,10 +3,12 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | 5 | #include <mutex> |
| 6 | #include <optional> | ||
| 7 | #include <string> | ||
| 6 | #include <thread> | 8 | #include <thread> |
| 9 | #include <unordered_set> | ||
| 7 | #include <boost/functional/hash.hpp> | 10 | #include <boost/functional/hash.hpp> |
| 8 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 9 | #include "common/hash.h" | ||
| 10 | #include "common/scope_exit.h" | 12 | #include "common/scope_exit.h" |
| 11 | #include "core/core.h" | 13 | #include "core/core.h" |
| 12 | #include "core/frontend/emu_window.h" | 14 | #include "core/frontend/emu_window.h" |
| @@ -22,18 +24,20 @@ | |||
| 22 | 24 | ||
| 23 | namespace OpenGL { | 25 | namespace OpenGL { |
| 24 | 26 | ||
| 27 | using Tegra::Engines::ShaderType; | ||
| 28 | using VideoCommon::Shader::ConstBufferLocker; | ||
| 25 | using VideoCommon::Shader::ProgramCode; | 29 | using VideoCommon::Shader::ProgramCode; |
| 30 | using VideoCommon::Shader::ShaderIR; | ||
| 31 | |||
| 32 | namespace { | ||
| 26 | 33 | ||
| 27 | // One UBO is always reserved for emulation values on staged shaders | 34 | // One UBO is always reserved for emulation values on staged shaders |
| 28 | constexpr u32 STAGE_RESERVED_UBOS = 1; | 35 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 29 | 36 | ||
| 30 | struct UnspecializedShader { | 37 | constexpr u32 STAGE_MAIN_OFFSET = 10; |
| 31 | std::string code; | 38 | constexpr u32 KERNEL_MAIN_OFFSET = 0; |
| 32 | GLShader::ShaderEntries entries; | ||
| 33 | ProgramType program_type; | ||
| 34 | }; | ||
| 35 | 39 | ||
| 36 | namespace { | 40 | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; |
| 37 | 41 | ||
| 38 | /// Gets the address for the specified shader stage program | 42 | /// Gets the address for the specified shader stage program |
| 39 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | 43 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { |
| @@ -42,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) | |||
| 42 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | 46 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; |
| 43 | } | 47 | } |
| 44 | 48 | ||
| 49 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 50 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 51 | // Sched instructions appear once every 4 instructions. | ||
| 52 | constexpr std::size_t SchedPeriod = 4; | ||
| 53 | const std::size_t absolute_offset = offset - main_offset; | ||
| 54 | return (absolute_offset % SchedPeriod) == 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Calculates the size of a program stream | ||
| 58 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||
| 59 | constexpr std::size_t start_offset = 10; | ||
| 60 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 61 | // shaders end with one. | ||
| 62 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 63 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 64 | std::size_t offset = start_offset; | ||
| 65 | while (offset < program.size()) { | ||
| 66 | const u64 instruction = program[offset]; | ||
| 67 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 68 | if ((instruction & mask) == self_jumping_branch) { | ||
| 69 | // End on Maxwell's "nop" instruction | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | if (instruction == 0) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | offset++; | ||
| 77 | } | ||
| 78 | // The last instruction is included in the program size | ||
| 79 | return std::min(offset + 1, program.size()); | ||
| 80 | } | ||
| 81 | |||
| 45 | /// Gets the shader program code from memory for the specified address | 82 | /// Gets the shader program code from memory for the specified address |
| 46 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | 83 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, |
| 47 | const u8* host_ptr) { | 84 | const u8* host_ptr) { |
| @@ -52,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 52 | }); | 89 | }); |
| 53 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), | 90 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), |
| 54 | program_code.size() * sizeof(u64)); | 91 | program_code.size() * sizeof(u64)); |
| 92 | program_code.resize(CalculateProgramSize(program_code)); | ||
| 55 | return program_code; | 93 | return program_code; |
| 56 | } | 94 | } |
| 57 | 95 | ||
| @@ -72,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { | |||
| 72 | } | 110 | } |
| 73 | } | 111 | } |
| 74 | 112 | ||
| 75 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 76 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 77 | // Sched instructions appear once every 4 instructions. | ||
| 78 | constexpr std::size_t SchedPeriod = 4; | ||
| 79 | const std::size_t absolute_offset = offset - main_offset; | ||
| 80 | return (absolute_offset % SchedPeriod) == 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | /// Describes primitive behavior on geometry shaders | 113 | /// Describes primitive behavior on geometry shaders |
| 84 | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | 114 | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { |
| 85 | switch (primitive_mode) { | 115 | switch (primitive_mode) { |
| @@ -122,122 +152,114 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { | |||
| 122 | return {}; | 152 | return {}; |
| 123 | } | 153 | } |
| 124 | 154 | ||
| 125 | /// Calculates the size of a program stream | ||
| 126 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||
| 127 | constexpr std::size_t start_offset = 10; | ||
| 128 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 129 | // shaders end with one. | ||
| 130 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 131 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 132 | std::size_t offset = start_offset; | ||
| 133 | std::size_t size = start_offset * sizeof(u64); | ||
| 134 | while (offset < program.size()) { | ||
| 135 | const u64 instruction = program[offset]; | ||
| 136 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 137 | if ((instruction & mask) == self_jumping_branch) { | ||
| 138 | // End on Maxwell's "nop" instruction | ||
| 139 | break; | ||
| 140 | } | ||
| 141 | if (instruction == 0) { | ||
| 142 | break; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | size += sizeof(u64); | ||
| 146 | offset++; | ||
| 147 | } | ||
| 148 | // The last instruction is included in the program size | ||
| 149 | return std::min(size + sizeof(u64), program.size() * sizeof(u64)); | ||
| 150 | } | ||
| 151 | |||
| 152 | /// Hashes one (or two) program streams | 155 | /// Hashes one (or two) program streams |
| 153 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | 156 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 154 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | 157 | const ProgramCode& code_b) { |
| 155 | if (size_a == 0) { | 158 | u64 unique_identifier = boost::hash_value(code); |
| 156 | size_a = CalculateProgramSize(code); | 159 | if (program_type == ProgramType::VertexA) { |
| 157 | } | 160 | // VertexA programs include two programs |
| 158 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | 161 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); |
| 159 | if (program_type != ProgramType::VertexA) { | ||
| 160 | return unique_identifier; | ||
| 161 | } | ||
| 162 | // VertexA programs include two programs | ||
| 163 | |||
| 164 | std::size_t seed = 0; | ||
| 165 | boost::hash_combine(seed, unique_identifier); | ||
| 166 | |||
| 167 | if (size_b == 0) { | ||
| 168 | size_b = CalculateProgramSize(code_b); | ||
| 169 | } | 162 | } |
| 170 | const u64 identifier_b = | 163 | return unique_identifier; |
| 171 | Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||
| 172 | boost::hash_combine(seed, identifier_b); | ||
| 173 | return static_cast<u64>(seed); | ||
| 174 | } | 164 | } |
| 175 | 165 | ||
| 176 | /// Creates an unspecialized program from code streams | 166 | /// Creates an unspecialized program from code streams |
| 177 | GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device, | 167 | std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, |
| 178 | ProgramType program_type, ProgramCode program_code, | 168 | const std::optional<ShaderIR>& ir_b) { |
| 179 | ProgramCode program_code_b) { | ||
| 180 | GLShader::ShaderSetup setup(program_code); | ||
| 181 | setup.program.size_a = CalculateProgramSize(program_code); | ||
| 182 | setup.program.size_b = 0; | ||
| 183 | if (program_type == ProgramType::VertexA) { | ||
| 184 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | ||
| 185 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | ||
| 186 | // stage here. | ||
| 187 | setup.SetProgramB(program_code_b); | ||
| 188 | setup.program.size_b = CalculateProgramSize(program_code_b); | ||
| 189 | } | ||
| 190 | setup.program.unique_identifier = GetUniqueIdentifier( | ||
| 191 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | ||
| 192 | |||
| 193 | switch (program_type) { | 169 | switch (program_type) { |
| 194 | case ProgramType::VertexA: | 170 | case ProgramType::VertexA: |
| 195 | case ProgramType::VertexB: { | 171 | case ProgramType::VertexB: |
| 196 | VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex, | 172 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); |
| 197 | &(system.GPU().Maxwell3D())}; | 173 | case ProgramType::Geometry: |
| 198 | return GLShader::GenerateVertexShader(locker, device, setup); | 174 | return GLShader::GenerateGeometryShader(device, ir); |
| 199 | } | 175 | case ProgramType::Fragment: |
| 200 | case ProgramType::Geometry: { | 176 | return GLShader::GenerateFragmentShader(device, ir); |
| 201 | VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry, | 177 | case ProgramType::Compute: |
| 202 | &(system.GPU().Maxwell3D())}; | 178 | return GLShader::GenerateComputeShader(device, ir); |
| 203 | return GLShader::GenerateGeometryShader(locker, device, setup); | ||
| 204 | } | ||
| 205 | case ProgramType::Fragment: { | ||
| 206 | VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment, | ||
| 207 | &(system.GPU().Maxwell3D())}; | ||
| 208 | return GLShader::GenerateFragmentShader(locker, device, setup); | ||
| 209 | } | ||
| 210 | case ProgramType::Compute: { | ||
| 211 | VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())}; | ||
| 212 | return GLShader::GenerateComputeShader(locker, device, setup); | ||
| 213 | } | ||
| 214 | default: | 179 | default: |
| 215 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); | 180 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 216 | return {}; | 181 | return {}; |
| 217 | } | 182 | } |
| 218 | } | 183 | } |
| 219 | 184 | ||
| 220 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 185 | constexpr const char* GetProgramTypeName(ProgramType program_type) { |
| 221 | ProgramType program_type, const ProgramVariant& variant, | 186 | switch (program_type) { |
| 222 | bool hint_retrievable = false) { | 187 | case ProgramType::VertexA: |
| 188 | case ProgramType::VertexB: | ||
| 189 | return "VS"; | ||
| 190 | case ProgramType::TessellationControl: | ||
| 191 | return "TCS"; | ||
| 192 | case ProgramType::TessellationEval: | ||
| 193 | return "TES"; | ||
| 194 | case ProgramType::Geometry: | ||
| 195 | return "GS"; | ||
| 196 | case ProgramType::Fragment: | ||
| 197 | return "FS"; | ||
| 198 | case ProgramType::Compute: | ||
| 199 | return "CS"; | ||
| 200 | } | ||
| 201 | return "UNK"; | ||
| 202 | } | ||
| 203 | |||
| 204 | Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { | ||
| 205 | switch (program_type) { | ||
| 206 | case ProgramType::VertexA: | ||
| 207 | case ProgramType::VertexB: | ||
| 208 | return Tegra::Engines::ShaderType::Vertex; | ||
| 209 | case ProgramType::TessellationControl: | ||
| 210 | return Tegra::Engines::ShaderType::TesselationControl; | ||
| 211 | case ProgramType::TessellationEval: | ||
| 212 | return Tegra::Engines::ShaderType::TesselationEval; | ||
| 213 | case ProgramType::Geometry: | ||
| 214 | return Tegra::Engines::ShaderType::Geometry; | ||
| 215 | case ProgramType::Fragment: | ||
| 216 | return Tegra::Engines::ShaderType::Fragment; | ||
| 217 | case ProgramType::Compute: | ||
| 218 | return Tegra::Engines::ShaderType::Compute; | ||
| 219 | } | ||
| 220 | UNREACHABLE(); | ||
| 221 | return {}; | ||
| 222 | } | ||
| 223 | |||
| 224 | std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { | ||
| 225 | return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); | ||
| 226 | } | ||
| 227 | |||
| 228 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, | ||
| 229 | const ProgramCode& program_code, const ProgramCode& program_code_b, | ||
| 230 | const ProgramVariant& variant, ConstBufferLocker& locker, | ||
| 231 | bool hint_retrievable = false) { | ||
| 232 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); | ||
| 233 | |||
| 234 | const bool is_compute = program_type == ProgramType::Compute; | ||
| 235 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 236 | const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); | ||
| 237 | std::optional<ShaderIR> ir_b; | ||
| 238 | if (!program_code_b.empty()) { | ||
| 239 | ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); | ||
| 240 | } | ||
| 241 | const auto entries = GLShader::GetEntries(ir); | ||
| 242 | |||
| 223 | auto base_bindings{variant.base_bindings}; | 243 | auto base_bindings{variant.base_bindings}; |
| 224 | const auto primitive_mode{variant.primitive_mode}; | 244 | const auto primitive_mode{variant.primitive_mode}; |
| 225 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 245 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 226 | 246 | ||
| 227 | std::string source = R"(#version 430 core | 247 | std::string source = fmt::format(R"(// {} |
| 248 | #version 430 core | ||
| 228 | #extension GL_ARB_separate_shader_objects : enable | 249 | #extension GL_ARB_separate_shader_objects : enable |
| 229 | #extension GL_ARB_shader_viewport_layer_array : enable | 250 | #extension GL_ARB_shader_viewport_layer_array : enable |
| 230 | #extension GL_EXT_shader_image_load_formatted : enable | 251 | #extension GL_EXT_shader_image_load_formatted : enable |
| 231 | #extension GL_NV_gpu_shader5 : enable | 252 | #extension GL_NV_gpu_shader5 : enable |
| 232 | #extension GL_NV_shader_thread_group : enable | 253 | #extension GL_NV_shader_thread_group : enable |
| 233 | #extension GL_NV_shader_thread_shuffle : enable | 254 | #extension GL_NV_shader_thread_shuffle : enable |
| 234 | )"; | 255 | )", |
| 235 | if (program_type == ProgramType::Compute) { | 256 | GetShaderId(unique_identifier, program_type)); |
| 257 | if (is_compute) { | ||
| 236 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | 258 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; |
| 237 | } | 259 | } |
| 238 | source += '\n'; | 260 | source += '\n'; |
| 239 | 261 | ||
| 240 | if (program_type != ProgramType::Compute) { | 262 | if (!is_compute) { |
| 241 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 263 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); |
| 242 | } | 264 | } |
| 243 | 265 | ||
| @@ -281,7 +303,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 281 | } | 303 | } |
| 282 | 304 | ||
| 283 | source += '\n'; | 305 | source += '\n'; |
| 284 | source += code; | 306 | source += GenerateGLSL(device, program_type, ir, ir_b); |
| 285 | 307 | ||
| 286 | OGLShader shader; | 308 | OGLShader shader; |
| 287 | shader.Create(source.c_str(), GetShaderType(program_type)); | 309 | shader.Create(source.c_str(), GetShaderType(program_type)); |
| @@ -291,85 +313,86 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 291 | return program; | 313 | return program; |
| 292 | } | 314 | } |
| 293 | 315 | ||
| 294 | std::set<GLenum> GetSupportedFormats() { | 316 | std::unordered_set<GLenum> GetSupportedFormats() { |
| 295 | std::set<GLenum> supported_formats; | ||
| 296 | |||
| 297 | GLint num_formats{}; | 317 | GLint num_formats{}; |
| 298 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 318 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |
| 299 | 319 | ||
| 300 | std::vector<GLint> formats(num_formats); | 320 | std::vector<GLint> formats(num_formats); |
| 301 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | 321 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); |
| 302 | 322 | ||
| 303 | for (const GLint format : formats) | 323 | std::unordered_set<GLenum> supported_formats; |
| 324 | for (const GLint format : formats) { | ||
| 304 | supported_formats.insert(static_cast<GLenum>(format)); | 325 | supported_formats.insert(static_cast<GLenum>(format)); |
| 326 | } | ||
| 305 | return supported_formats; | 327 | return supported_formats; |
| 306 | } | 328 | } |
| 307 | 329 | ||
| 308 | } // Anonymous namespace | 330 | } // Anonymous namespace |
| 309 | 331 | ||
| 310 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | 332 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 311 | GLShader::ProgramResult result) | 333 | GLShader::ShaderEntries entries, ProgramCode program_code, |
| 312 | : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, | 334 | ProgramCode program_code_b) |
| 313 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 335 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, |
| 314 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, | 336 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, |
| 315 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} | 337 | unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, |
| 338 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { | ||
| 339 | if (params.precompiled_variants) { | ||
| 340 | for (const auto& pair : *params.precompiled_variants) { | ||
| 341 | const auto& variant = pair->first.variant; | ||
| 342 | programs.emplace(variant, pair->second); | ||
| 343 | } | ||
| 344 | } | ||
| 345 | } | ||
| 316 | 346 | ||
| 317 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 347 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 318 | Maxwell::ShaderProgram program_type, | 348 | Maxwell::ShaderProgram program_type, |
| 319 | ProgramCode&& program_code, | 349 | ProgramCode program_code, ProgramCode program_code_b) { |
| 320 | ProgramCode&& program_code_b) { | ||
| 321 | const auto code_size{CalculateProgramSize(program_code)}; | ||
| 322 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | ||
| 323 | auto result{CreateProgram(params.system, params.device, GetProgramType(program_type), | ||
| 324 | program_code, program_code_b)}; | ||
| 325 | if (result.first.empty()) { | ||
| 326 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | ||
| 327 | return {}; | ||
| 328 | } | ||
| 329 | |||
| 330 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 350 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 331 | params.unique_identifier, GetProgramType(program_type), | 351 | params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); |
| 332 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), | 352 | |
| 333 | std::move(program_code), std::move(program_code_b))); | 353 | ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); |
| 334 | 354 | const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | |
| 335 | return std::shared_ptr<CachedShader>( | 355 | // TODO(Rodrigo): Handle VertexA shaders |
| 336 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | 356 | // std::optional<ShaderIR> ir_b; |
| 337 | } | 357 | // if (!program_code_b.empty()) { |
| 338 | 358 | // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); | |
| 339 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 359 | // } |
| 340 | Maxwell::ShaderProgram program_type, | ||
| 341 | GLShader::ProgramResult result) { | ||
| 342 | return std::shared_ptr<CachedShader>( | 360 | return std::shared_ptr<CachedShader>( |
| 343 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | 361 | new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), |
| 362 | std::move(program_code), std::move(program_code_b))); | ||
| 344 | } | 363 | } |
| 345 | 364 | ||
| 346 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | 365 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 347 | auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})}; | 366 | params.disk_cache.SaveRaw( |
| 367 | ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); | ||
| 348 | 368 | ||
| 349 | const auto code_size{CalculateProgramSize(code)}; | 369 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); |
| 350 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | 370 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); |
| 351 | static_cast<u32>(code_size / sizeof(u64)), 0, | 371 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 352 | std::move(code), {})); | 372 | params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); |
| 353 | |||
| 354 | return std::shared_ptr<CachedShader>( | ||
| 355 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 356 | } | 373 | } |
| 357 | 374 | ||
| 358 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | 375 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 359 | GLShader::ProgramResult result) { | 376 | const UnspecializedShader& unspecialized) { |
| 360 | return std::shared_ptr<CachedShader>( | 377 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, |
| 361 | new CachedShader(params, ProgramType::Compute, std::move(result))); | 378 | unspecialized.entries, unspecialized.code, |
| 379 | unspecialized.code_b)); | ||
| 362 | } | 380 | } |
| 363 | 381 | ||
| 364 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 382 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 365 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 383 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| 366 | auto& program = entry->second; | 384 | auto& program = entry->second; |
| 367 | if (is_cache_miss) { | 385 | if (is_cache_miss) { |
| 368 | program = TryLoadProgram(variant); | 386 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
| 369 | if (!program) { | 387 | if (program_type == ProgramType::Compute) { |
| 370 | program = SpecializeShader(code, entries, program_type, variant); | 388 | engine = &system.GPU().KeplerCompute(); |
| 371 | disk_cache.SaveUsage(GetUsage(variant)); | 389 | } else { |
| 390 | engine = &system.GPU().Maxwell3D(); | ||
| 372 | } | 391 | } |
| 392 | ConstBufferLocker locker(GetEnginesShaderType(program_type), *engine); | ||
| 393 | program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, | ||
| 394 | variant, locker); | ||
| 395 | disk_cache.SaveUsage(GetUsage(variant)); | ||
| 373 | 396 | ||
| 374 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | 397 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| 375 | } | 398 | } |
| @@ -385,14 +408,6 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 385 | return {program->handle, base_bindings}; | 408 | return {program->handle, base_bindings}; |
| 386 | } | 409 | } |
| 387 | 410 | ||
| 388 | CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { | ||
| 389 | const auto found = precompiled_programs.find(GetUsage(variant)); | ||
| 390 | if (found == precompiled_programs.end()) { | ||
| 391 | return {}; | ||
| 392 | } | ||
| 393 | return found->second; | ||
| 394 | } | ||
| 395 | |||
| 396 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { | 411 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { |
| 397 | ShaderDiskCacheUsage usage; | 412 | ShaderDiskCacheUsage usage; |
| 398 | usage.unique_identifier = unique_identifier; | 413 | usage.unique_identifier = unique_identifier; |
| @@ -412,18 +427,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 412 | return; | 427 | return; |
| 413 | } | 428 | } |
| 414 | const auto [raws, shader_usages] = *transferable; | 429 | const auto [raws, shader_usages] = *transferable; |
| 415 | 430 | if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | |
| 416 | auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); | ||
| 417 | |||
| 418 | const auto supported_formats{GetSupportedFormats()}; | ||
| 419 | const auto unspecialized_shaders{ | ||
| 420 | GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; | ||
| 421 | if (stop_loading) { | ||
| 422 | return; | 431 | return; |
| 423 | } | 432 | } |
| 424 | 433 | ||
| 425 | // Track if precompiled cache was altered during loading to know if we have to serialize the | 434 | const auto dumps = disk_cache.LoadPrecompiled(); |
| 426 | // virtual precompiled cache file back to the hard drive | 435 | const auto supported_formats = GetSupportedFormats(); |
| 436 | |||
| 437 | // Track if precompiled cache was altered during loading to know if we have to | ||
| 438 | // serialize the virtual precompiled cache file back to the hard drive | ||
| 427 | bool precompiled_cache_altered = false; | 439 | bool precompiled_cache_altered = false; |
| 428 | 440 | ||
| 429 | // Inform the frontend about shader build initialization | 441 | // Inform the frontend about shader build initialization |
| @@ -446,9 +458,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 446 | return; | 458 | return; |
| 447 | } | 459 | } |
| 448 | const auto& usage{shader_usages[i]}; | 460 | const auto& usage{shader_usages[i]}; |
| 449 | LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", | ||
| 450 | usage.unique_identifier, i, shader_usages.size()); | ||
| 451 | |||
| 452 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | 461 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; |
| 453 | const auto dump{dumps.find(usage)}; | 462 | const auto dump{dumps.find(usage)}; |
| 454 | 463 | ||
| @@ -462,21 +471,27 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 462 | } | 471 | } |
| 463 | } | 472 | } |
| 464 | if (!shader) { | 473 | if (!shader) { |
| 465 | shader = SpecializeShader(unspecialized.code, unspecialized.entries, | 474 | ConstBufferLocker locker(GetEnginesShaderType(unspecialized.program_type)); |
| 466 | unspecialized.program_type, usage.variant, true); | 475 | shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, |
| 476 | unspecialized.code, unspecialized.code_b, usage.variant, | ||
| 477 | locker, true); | ||
| 467 | } | 478 | } |
| 468 | 479 | ||
| 469 | std::scoped_lock lock(mutex); | 480 | std::scoped_lock lock{mutex}; |
| 470 | if (callback) { | 481 | if (callback) { |
| 471 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | 482 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |
| 472 | shader_usages.size()); | 483 | shader_usages.size()); |
| 473 | } | 484 | } |
| 474 | 485 | ||
| 475 | precompiled_programs.emplace(usage, std::move(shader)); | 486 | precompiled_programs.emplace(usage, std::move(shader)); |
| 487 | |||
| 488 | // TODO(Rodrigo): Is there a better way to do this? | ||
| 489 | precompiled_variants[usage.unique_identifier].push_back( | ||
| 490 | precompiled_programs.find(usage)); | ||
| 476 | } | 491 | } |
| 477 | }; | 492 | }; |
| 478 | 493 | ||
| 479 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; | 494 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; |
| 480 | const std::size_t bucket_size{shader_usages.size() / num_workers}; | 495 | const std::size_t bucket_size{shader_usages.size() / num_workers}; |
| 481 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | 496 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |
| 482 | std::vector<std::thread> threads(num_workers); | 497 | std::vector<std::thread> threads(num_workers); |
| @@ -496,7 +511,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 496 | if (compilation_failed) { | 511 | if (compilation_failed) { |
| 497 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 512 | // Invalidate the precompiled cache if a shader dumped shader was rejected |
| 498 | disk_cache.InvalidatePrecompiled(); | 513 | disk_cache.InvalidatePrecompiled(); |
| 499 | dumps.clear(); | ||
| 500 | precompiled_cache_altered = true; | 514 | precompiled_cache_altered = true; |
| 501 | return; | 515 | return; |
| 502 | } | 516 | } |
| @@ -504,8 +518,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 504 | return; | 518 | return; |
| 505 | } | 519 | } |
| 506 | 520 | ||
| 507 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before | 521 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 508 | // precompiling them | 522 | // before precompiling them |
| 509 | 523 | ||
| 510 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { | 524 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { |
| 511 | const auto& usage{shader_usages[i]}; | 525 | const auto& usage{shader_usages[i]}; |
| @@ -521,9 +535,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 521 | } | 535 | } |
| 522 | } | 536 | } |
| 523 | 537 | ||
| 524 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | 538 | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { |
| 525 | const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { | 539 | const auto it = precompiled_variants.find(unique_identifier); |
| 540 | return it == precompiled_variants.end() ? nullptr : &it->second; | ||
| 541 | } | ||
| 526 | 542 | ||
| 543 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 544 | const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||
| 527 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | 545 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { |
| 528 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | 546 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); |
| 529 | return {}; | 547 | return {}; |
| @@ -545,56 +563,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 545 | return shader; | 563 | return shader; |
| 546 | } | 564 | } |
| 547 | 565 | ||
| 548 | std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( | 566 | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( |
| 549 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | 567 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, |
| 550 | const std::vector<ShaderDiskCacheRaw>& raws, | 568 | const std::vector<ShaderDiskCacheRaw>& raws) { |
| 551 | const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { | ||
| 552 | std::unordered_map<u64, UnspecializedShader> unspecialized; | ||
| 553 | |||
| 554 | if (callback) { | 569 | if (callback) { |
| 555 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | 570 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); |
| 556 | } | 571 | } |
| 557 | 572 | ||
| 558 | for (std::size_t i = 0; i < raws.size(); ++i) { | 573 | for (std::size_t i = 0; i < raws.size(); ++i) { |
| 559 | if (stop_loading) { | 574 | if (stop_loading) { |
| 560 | return {}; | 575 | return false; |
| 561 | } | 576 | } |
| 562 | const auto& raw{raws[i]}; | 577 | const auto& raw{raws[i]}; |
| 563 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | 578 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; |
| 564 | const u64 calculated_hash{ | 579 | const u64 calculated_hash{ |
| 565 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; | 580 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; |
| 566 | if (unique_identifier != calculated_hash) { | 581 | if (unique_identifier != calculated_hash) { |
| 567 | LOG_ERROR( | 582 | LOG_ERROR(Render_OpenGL, |
| 568 | Render_OpenGL, | 583 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " |
| 569 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", | 584 | "removing shader cache", |
| 570 | raw.GetUniqueIdentifier(), calculated_hash); | 585 | raw.GetUniqueIdentifier(), calculated_hash); |
| 571 | disk_cache.InvalidateTransferable(); | 586 | disk_cache.InvalidateTransferable(); |
| 572 | return {}; | 587 | return false; |
| 573 | } | 588 | } |
| 574 | 589 | ||
| 575 | GLShader::ProgramResult result; | 590 | const u32 main_offset = |
| 576 | if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { | 591 | raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 577 | // If it's stored in the precompiled file, avoid decompiling it here | 592 | ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); |
| 578 | const auto& stored_decompiled{it->second}; | 593 | const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); |
| 579 | result = {stored_decompiled.code, stored_decompiled.entries}; | 594 | // TODO(Rodrigo): Handle VertexA shaders |
| 580 | } else { | 595 | // std::optional<ShaderIR> ir_b; |
| 581 | // Otherwise decompile the shader at boot and save the result to the decompiled file | 596 | // if (raw.HasProgramA()) { |
| 582 | result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(), | 597 | // ir_b.emplace(raw.GetProgramCodeB(), main_offset); |
| 583 | raw.GetProgramCodeB()); | 598 | // } |
| 584 | disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); | 599 | |
| 585 | } | 600 | UnspecializedShader unspecialized; |
| 586 | 601 | unspecialized.entries = GLShader::GetEntries(ir); | |
| 587 | precompiled_shaders.insert({unique_identifier, result}); | 602 | unspecialized.program_type = raw.GetProgramType(); |
| 588 | 603 | unspecialized.code = raw.GetProgramCode(); | |
| 589 | unspecialized.insert( | 604 | unspecialized.code_b = raw.GetProgramCodeB(); |
| 590 | {raw.GetUniqueIdentifier(), | 605 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); |
| 591 | {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); | ||
| 592 | 606 | ||
| 593 | if (callback) { | 607 | if (callback) { |
| 594 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | 608 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); |
| 595 | } | 609 | } |
| 596 | } | 610 | } |
| 597 | return unspecialized; | 611 | return true; |
| 598 | } | 612 | } |
| 599 | 613 | ||
| 600 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 614 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| @@ -603,37 +617,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 603 | } | 617 | } |
| 604 | 618 | ||
| 605 | auto& memory_manager{system.GPU().MemoryManager()}; | 619 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 606 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 620 | const GPUVAddr address{GetShaderAddress(system, program)}; |
| 607 | 621 | ||
| 608 | // Look up shader in the cache based on address | 622 | // Look up shader in the cache based on address |
| 609 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 623 | const auto host_ptr{memory_manager.GetPointer(address)}; |
| 610 | Shader shader{TryGet(host_ptr)}; | 624 | Shader shader{TryGet(host_ptr)}; |
| 611 | if (shader) { | 625 | if (shader) { |
| 612 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 626 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 613 | } | 627 | } |
| 614 | 628 | ||
| 615 | // No shader found - create a new one | 629 | // No shader found - create a new one |
| 616 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 630 | ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; |
| 617 | ProgramCode program_code_b; | 631 | ProgramCode code_b; |
| 618 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; | 632 | if (program == Maxwell::ShaderProgram::VertexA) { |
| 619 | if (is_program_a) { | 633 | const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 620 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 634 | code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); |
| 621 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 635 | } |
| 622 | memory_manager.GetPointer(program_addr_b)); | 636 | |
| 623 | } | 637 | const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); |
| 624 | 638 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | |
| 625 | const auto unique_identifier = | 639 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 626 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | 640 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, |
| 627 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 641 | cpu_addr, host_ptr, unique_identifier}; |
| 628 | const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, | 642 | |
| 629 | host_ptr, unique_identifier}; | 643 | const auto found = unspecialized_shaders.find(unique_identifier); |
| 630 | 644 | if (found == unspecialized_shaders.end()) { | |
| 631 | const auto found = precompiled_shaders.find(unique_identifier); | 645 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), |
| 632 | if (found == precompiled_shaders.end()) { | 646 | std::move(code_b)); |
| 633 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), | ||
| 634 | std::move(program_code_b)); | ||
| 635 | } else { | 647 | } else { |
| 636 | shader = CachedShader::CreateStageFromCache(params, program, found->second); | 648 | shader = CachedShader::CreateFromCache(params, found->second); |
| 637 | } | 649 | } |
| 638 | Register(shader); | 650 | Register(shader); |
| 639 | 651 | ||
| @@ -651,15 +663,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 651 | // No kernel found - create a new one | 663 | // No kernel found - create a new one |
| 652 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 664 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 653 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | 665 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; |
| 666 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 654 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 667 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 655 | const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, | 668 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, |
| 656 | host_ptr, unique_identifier}; | 669 | cpu_addr, host_ptr, unique_identifier}; |
| 657 | 670 | ||
| 658 | const auto found = precompiled_shaders.find(unique_identifier); | 671 | const auto found = unspecialized_shaders.find(unique_identifier); |
| 659 | if (found == precompiled_shaders.end()) { | 672 | if (found == unspecialized_shaders.end()) { |
| 660 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 673 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |
| 661 | } else { | 674 | } else { |
| 662 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | 675 | kernel = CachedShader::CreateFromCache(params, found->second); |
| 663 | } | 676 | } |
| 664 | 677 | ||
| 665 | Register(kernel); | 678 | Register(kernel); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6ff78f005..700a83853 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -8,9 +8,10 @@ | |||
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <bitset> | 9 | #include <bitset> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <set> | 11 | #include <string> |
| 12 | #include <tuple> | 12 | #include <tuple> |
| 13 | #include <unordered_map> | 13 | #include <unordered_map> |
| 14 | #include <unordered_set> | ||
| 14 | #include <vector> | 15 | #include <vector> |
| 15 | 16 | ||
| 16 | #include <glad/glad.h> | 17 | #include <glad/glad.h> |
| @@ -20,6 +21,7 @@ | |||
| 20 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 24 | #include "video_core/shader/shader_ir.h" | ||
| 23 | 25 | ||
| 24 | namespace Core { | 26 | namespace Core { |
| 25 | class System; | 27 | class System; |
| @@ -40,12 +42,19 @@ using Shader = std::shared_ptr<CachedShader>; | |||
| 40 | using CachedProgram = std::shared_ptr<OGLProgram>; | 42 | using CachedProgram = std::shared_ptr<OGLProgram>; |
| 41 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 42 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 44 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; |
| 43 | using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | 45 | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; |
| 46 | |||
| 47 | struct UnspecializedShader { | ||
| 48 | GLShader::ShaderEntries entries; | ||
| 49 | ProgramType program_type; | ||
| 50 | ProgramCode code; | ||
| 51 | ProgramCode code_b; | ||
| 52 | }; | ||
| 44 | 53 | ||
| 45 | struct ShaderParameters { | 54 | struct ShaderParameters { |
| 46 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 47 | const PrecompiledPrograms& precompiled_programs; | ||
| 48 | Core::System& system; | 55 | Core::System& system; |
| 56 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 57 | const PrecompiledVariants* precompiled_variants; | ||
| 49 | const Device& device; | 58 | const Device& device; |
| 50 | VAddr cpu_addr; | 59 | VAddr cpu_addr; |
| 51 | u8* host_ptr; | 60 | u8* host_ptr; |
| @@ -56,23 +65,18 @@ class CachedShader final : public RasterizerCacheObject { | |||
| 56 | public: | 65 | public: |
| 57 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 66 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 58 | Maxwell::ShaderProgram program_type, | 67 | Maxwell::ShaderProgram program_type, |
| 59 | ProgramCode&& program_code, ProgramCode&& program_code_b); | 68 | ProgramCode program_code, ProgramCode program_code_b); |
| 69 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | ||
| 60 | 70 | ||
| 61 | static Shader CreateStageFromCache(const ShaderParameters& params, | 71 | static Shader CreateFromCache(const ShaderParameters& params, |
| 62 | Maxwell::ShaderProgram program_type, | 72 | const UnspecializedShader& unspecialized); |
| 63 | GLShader::ProgramResult result); | ||
| 64 | |||
| 65 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||
| 66 | |||
| 67 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 68 | GLShader::ProgramResult result); | ||
| 69 | 73 | ||
| 70 | VAddr GetCpuAddr() const override { | 74 | VAddr GetCpuAddr() const override { |
| 71 | return cpu_addr; | 75 | return cpu_addr; |
| 72 | } | 76 | } |
| 73 | 77 | ||
| 74 | std::size_t GetSizeInBytes() const override { | 78 | std::size_t GetSizeInBytes() const override { |
| 75 | return shader_length; | 79 | return program_code.size() * sizeof(u64); |
| 76 | } | 80 | } |
| 77 | 81 | ||
| 78 | /// Gets the shader entries for the shader | 82 | /// Gets the shader entries for the shader |
| @@ -85,21 +89,24 @@ public: | |||
| 85 | 89 | ||
| 86 | private: | 90 | private: |
| 87 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, | 91 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 88 | GLShader::ProgramResult result); | 92 | GLShader::ShaderEntries entries, ProgramCode program_code, |
| 89 | 93 | ProgramCode program_code_b); | |
| 90 | CachedProgram TryLoadProgram(const ProgramVariant& variant) const; | ||
| 91 | 94 | ||
| 92 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; | 95 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; |
| 93 | 96 | ||
| 97 | Core::System& system; | ||
| 98 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 99 | const Device& device; | ||
| 100 | |||
| 94 | VAddr cpu_addr{}; | 101 | VAddr cpu_addr{}; |
| 102 | |||
| 95 | u64 unique_identifier{}; | 103 | u64 unique_identifier{}; |
| 96 | ProgramType program_type{}; | 104 | ProgramType program_type{}; |
| 97 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 98 | const PrecompiledPrograms& precompiled_programs; | ||
| 99 | 105 | ||
| 100 | GLShader::ShaderEntries entries; | 106 | GLShader::ShaderEntries entries; |
| 101 | std::string code; | 107 | |
| 102 | std::size_t shader_length{}; | 108 | ProgramCode program_code; |
| 109 | ProgramCode program_code_b; | ||
| 103 | 110 | ||
| 104 | std::unordered_map<ProgramVariant, CachedProgram> programs; | 111 | std::unordered_map<ProgramVariant, CachedProgram> programs; |
| 105 | }; | 112 | }; |
| @@ -124,21 +131,26 @@ protected: | |||
| 124 | void FlushObjectInner(const Shader& object) override {} | 131 | void FlushObjectInner(const Shader& object) override {} |
| 125 | 132 | ||
| 126 | private: | 133 | private: |
| 127 | std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( | 134 | bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, |
| 128 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | 135 | const VideoCore::DiskResourceLoadCallback& callback, |
| 129 | const std::vector<ShaderDiskCacheRaw>& raws, | 136 | const std::vector<ShaderDiskCacheRaw>& raws); |
| 130 | const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled); | ||
| 131 | 137 | ||
| 132 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | 138 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, |
| 133 | const std::set<GLenum>& supported_formats); | 139 | const std::unordered_set<GLenum>& supported_formats); |
| 140 | |||
| 141 | const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||
| 134 | 142 | ||
| 135 | Core::System& system; | 143 | Core::System& system; |
| 136 | Core::Frontend::EmuWindow& emu_window; | 144 | Core::Frontend::EmuWindow& emu_window; |
| 137 | const Device& device; | 145 | const Device& device; |
| 146 | |||
| 138 | ShaderDiskCacheOpenGL disk_cache; | 147 | ShaderDiskCacheOpenGL disk_cache; |
| 139 | 148 | ||
| 140 | PrecompiledShaders precompiled_shaders; | ||
| 141 | PrecompiledPrograms precompiled_programs; | 149 | PrecompiledPrograms precompiled_programs; |
| 150 | std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||
| 151 | |||
| 152 | std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||
| 153 | |||
| 142 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 154 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 143 | }; | 155 | }; |
| 144 | 156 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 71d7389cb..030550c53 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -415,27 +415,6 @@ public: | |||
| 415 | return code.GetResult(); | 415 | return code.GetResult(); |
| 416 | } | 416 | } |
| 417 | 417 | ||
| 418 | ShaderEntries GetShaderEntries() const { | ||
| 419 | ShaderEntries entries; | ||
| 420 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 421 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 422 | cbuf.first); | ||
| 423 | } | ||
| 424 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 425 | entries.samplers.emplace_back(sampler); | ||
| 426 | } | ||
| 427 | for (const auto& [offset, image] : ir.GetImages()) { | ||
| 428 | entries.images.emplace_back(image); | ||
| 429 | } | ||
| 430 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 431 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, | ||
| 432 | usage.is_read, usage.is_written); | ||
| 433 | } | ||
| 434 | entries.clip_distances = ir.GetClipDistances(); | ||
| 435 | entries.shader_length = ir.GetLength(); | ||
| 436 | return entries; | ||
| 437 | } | ||
| 438 | |||
| 439 | private: | 418 | private: |
| 440 | friend class ASTDecompiler; | 419 | friend class ASTDecompiler; |
| 441 | friend class ExprDecompiler; | 420 | friend class ExprDecompiler; |
| @@ -2481,25 +2460,46 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2481 | 2460 | ||
| 2482 | } // Anonymous namespace | 2461 | } // Anonymous namespace |
| 2483 | 2462 | ||
| 2463 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | ||
| 2464 | ShaderEntries entries; | ||
| 2465 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 2466 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 2467 | cbuf.first); | ||
| 2468 | } | ||
| 2469 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 2470 | entries.samplers.emplace_back(sampler); | ||
| 2471 | } | ||
| 2472 | for (const auto& [offset, image] : ir.GetImages()) { | ||
| 2473 | entries.images.emplace_back(image); | ||
| 2474 | } | ||
| 2475 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 2476 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, | ||
| 2477 | usage.is_written); | ||
| 2478 | } | ||
| 2479 | entries.clip_distances = ir.GetClipDistances(); | ||
| 2480 | entries.shader_length = ir.GetLength(); | ||
| 2481 | return entries; | ||
| 2482 | } | ||
| 2483 | |||
| 2484 | std::string GetCommonDeclarations() { | 2484 | std::string GetCommonDeclarations() { |
| 2485 | return fmt::format( | 2485 | return R"(#define ftoi floatBitsToInt |
| 2486 | "#define ftoi floatBitsToInt\n" | 2486 | #define ftou floatBitsToUint |
| 2487 | "#define ftou floatBitsToUint\n" | 2487 | #define itof intBitsToFloat |
| 2488 | "#define itof intBitsToFloat\n" | 2488 | #define utof uintBitsToFloat |
| 2489 | "#define utof uintBitsToFloat\n\n" | 2489 | |
| 2490 | "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" | 2490 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { |
| 2491 | " bvec2 is_nan1 = isnan(pair1);\n" | 2491 | bvec2 is_nan1 = isnan(pair1); |
| 2492 | " bvec2 is_nan2 = isnan(pair2);\n" | 2492 | bvec2 is_nan2 = isnan(pair2); |
| 2493 | " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " | 2493 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); |
| 2494 | "is_nan2.y);\n" | 2494 | } |
| 2495 | "}}\n\n"); | 2495 | )"; |
| 2496 | } | 2496 | } |
| 2497 | 2497 | ||
| 2498 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, | 2498 | std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 2499 | const std::string& suffix) { | 2499 | const std::string& suffix) { |
| 2500 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2500 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 2501 | decompiler.Decompile(); | 2501 | decompiler.Decompile(); |
| 2502 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; | 2502 | return decompiler.GetResult(); |
| 2503 | } | 2503 | } |
| 2504 | 2504 | ||
| 2505 | } // namespace OpenGL::GLShader | 2505 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e538dc001..fead2a51e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -34,10 +34,7 @@ enum class ProgramType : u32 { | |||
| 34 | 34 | ||
| 35 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 36 | 36 | ||
| 37 | struct ShaderEntries; | ||
| 38 | |||
| 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 40 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 41 | using SamplerEntry = VideoCommon::Shader::Sampler; | 38 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 42 | using ImageEntry = VideoCommon::Shader::Image; | 39 | using ImageEntry = VideoCommon::Shader::Image; |
| 43 | 40 | ||
| @@ -93,9 +90,11 @@ struct ShaderEntries { | |||
| 93 | std::size_t shader_length{}; | 90 | std::size_t shader_length{}; |
| 94 | }; | 91 | }; |
| 95 | 92 | ||
| 93 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | ||
| 94 | |||
| 96 | std::string GetCommonDeclarations(); | 95 | std::string GetCommonDeclarations(); |
| 97 | 96 | ||
| 98 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 97 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 99 | ProgramType stage, const std::string& suffix); | 98 | ProgramType stage, const std::string& suffix); |
| 100 | 99 | ||
| 101 | } // namespace OpenGL::GLShader | 100 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 74cc33476..ddc19dccd 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -29,12 +29,7 @@ enum class TransferableEntryKind : u32 { | |||
| 29 | Usage, | 29 | Usage, |
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | enum class PrecompiledEntryKind : u32 { | 32 | constexpr u32 NativeVersion = 5; |
| 33 | Decompiled, | ||
| 34 | Dump, | ||
| 35 | }; | ||
| 36 | |||
| 37 | constexpr u32 NativeVersion = 4; | ||
| 38 | 33 | ||
| 39 | // Making sure sizes doesn't change by accident | 34 | // Making sure sizes doesn't change by accident |
| 40 | static_assert(sizeof(BaseBindings) == 16); | 35 | static_assert(sizeof(BaseBindings) == 16); |
| @@ -49,13 +44,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 49 | return hash; | 44 | return hash; |
| 50 | } | 45 | } |
| 51 | 46 | ||
| 52 | } // namespace | 47 | } // Anonymous namespace |
| 53 | 48 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | 49 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | ||
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 50 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 51 | : unique_identifier{unique_identifier}, program_type{program_type}, |
| 58 | program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, | ||
| 59 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} | 52 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} |
| 60 | 53 | ||
| 61 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; |
| @@ -90,15 +83,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | |||
| 90 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 83 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { |
| 91 | if (file.WriteObject(unique_identifier) != 1 || | 84 | if (file.WriteObject(unique_identifier) != 1 || |
| 92 | file.WriteObject(static_cast<u32>(program_type)) != 1 || | 85 | file.WriteObject(static_cast<u32>(program_type)) != 1 || |
| 93 | file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { | 86 | file.WriteObject(static_cast<u32>(program_code.size())) != 1 || |
| 87 | file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { | ||
| 94 | return false; | 88 | return false; |
| 95 | } | 89 | } |
| 96 | 90 | ||
| 97 | if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) | 91 | if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) |
| 98 | return false; | 92 | return false; |
| 99 | 93 | ||
| 100 | if (HasProgramA() && | 94 | if (HasProgramA() && |
| 101 | file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { | 95 | file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { |
| 102 | return false; | 96 | return false; |
| 103 | } | 97 | } |
| 104 | return true; | 98 | return true; |
| @@ -186,13 +180,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 186 | return {{std::move(raws), std::move(usages)}}; | 180 | return {{std::move(raws), std::move(usages)}}; |
| 187 | } | 181 | } |
| 188 | 182 | ||
| 189 | std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> | 183 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> |
| 190 | ShaderDiskCacheOpenGL::LoadPrecompiled() { | 184 | ShaderDiskCacheOpenGL::LoadPrecompiled() { |
| 191 | if (!is_usable) { | 185 | if (!is_usable) { |
| 192 | return {}; | 186 | return {}; |
| 193 | } | 187 | } |
| 194 | 188 | ||
| 195 | FileUtil::IOFile file(GetPrecompiledPath(), "rb"); | 189 | std::string path = GetPrecompiledPath(); |
| 190 | FileUtil::IOFile file(path, "rb"); | ||
| 196 | if (!file.IsOpen()) { | 191 | if (!file.IsOpen()) { |
| 197 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | 192 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", |
| 198 | GetTitleID()); | 193 | GetTitleID()); |
| @@ -211,7 +206,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { | |||
| 211 | return *result; | 206 | return *result; |
| 212 | } | 207 | } |
| 213 | 208 | ||
| 214 | std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> | 209 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> |
| 215 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | 210 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { |
| 216 | // Read compressed file from disk and decompress to virtual precompiled cache file | 211 | // Read compressed file from disk and decompress to virtual precompiled cache file |
| 217 | std::vector<u8> compressed(file.GetSize()); | 212 | std::vector<u8> compressed(file.GetSize()); |
| @@ -231,238 +226,31 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 231 | return {}; | 226 | return {}; |
| 232 | } | 227 | } |
| 233 | 228 | ||
| 234 | std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; | ||
| 235 | ShaderDumpsMap dumps; | 229 | ShaderDumpsMap dumps; |
| 236 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | 230 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |
| 237 | PrecompiledEntryKind kind{}; | 231 | ShaderDiskCacheUsage usage; |
| 238 | if (!LoadObjectFromPrecompiled(kind)) { | 232 | if (!LoadObjectFromPrecompiled(usage)) { |
| 239 | return {}; | 233 | return {}; |
| 240 | } | 234 | } |
| 241 | 235 | ||
| 242 | switch (kind) { | 236 | ShaderDiskCacheDump dump; |
| 243 | case PrecompiledEntryKind::Decompiled: { | 237 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { |
| 244 | u64 unique_identifier{}; | ||
| 245 | if (!LoadObjectFromPrecompiled(unique_identifier)) { | ||
| 246 | return {}; | ||
| 247 | } | ||
| 248 | |||
| 249 | auto entry = LoadDecompiledEntry(); | ||
| 250 | if (!entry) { | ||
| 251 | return {}; | ||
| 252 | } | ||
| 253 | decompiled.insert({unique_identifier, std::move(*entry)}); | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | case PrecompiledEntryKind::Dump: { | ||
| 257 | ShaderDiskCacheUsage usage; | ||
| 258 | if (!LoadObjectFromPrecompiled(usage)) { | ||
| 259 | return {}; | ||
| 260 | } | ||
| 261 | |||
| 262 | ShaderDiskCacheDump dump; | ||
| 263 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||
| 264 | return {}; | ||
| 265 | } | ||
| 266 | |||
| 267 | u32 binary_length{}; | ||
| 268 | if (!LoadObjectFromPrecompiled(binary_length)) { | ||
| 269 | return {}; | ||
| 270 | } | ||
| 271 | |||
| 272 | dump.binary.resize(binary_length); | ||
| 273 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | ||
| 274 | return {}; | ||
| 275 | } | ||
| 276 | |||
| 277 | dumps.insert({usage, dump}); | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | default: | ||
| 281 | return {}; | 238 | return {}; |
| 282 | } | 239 | } |
| 283 | } | ||
| 284 | return {{decompiled, dumps}}; | ||
| 285 | } | ||
| 286 | 240 | ||
| 287 | std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { | 241 | u32 binary_length{}; |
| 288 | u32 code_size{}; | 242 | if (!LoadObjectFromPrecompiled(binary_length)) { |
| 289 | if (!LoadObjectFromPrecompiled(code_size)) { | ||
| 290 | return {}; | ||
| 291 | } | ||
| 292 | |||
| 293 | std::string code(code_size, '\0'); | ||
| 294 | if (!LoadArrayFromPrecompiled(code.data(), code.size())) { | ||
| 295 | return {}; | ||
| 296 | } | ||
| 297 | |||
| 298 | ShaderDiskCacheDecompiled entry; | ||
| 299 | entry.code = std::move(code); | ||
| 300 | |||
| 301 | u32 const_buffers_count{}; | ||
| 302 | if (!LoadObjectFromPrecompiled(const_buffers_count)) { | ||
| 303 | return {}; | ||
| 304 | } | ||
| 305 | |||
| 306 | for (u32 i = 0; i < const_buffers_count; ++i) { | ||
| 307 | u32 max_offset{}; | ||
| 308 | u32 index{}; | ||
| 309 | bool is_indirect{}; | ||
| 310 | if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || | ||
| 311 | !LoadObjectFromPrecompiled(is_indirect)) { | ||
| 312 | return {}; | 243 | return {}; |
| 313 | } | 244 | } |
| 314 | entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); | ||
| 315 | } | ||
| 316 | 245 | ||
| 317 | u32 samplers_count{}; | 246 | dump.binary.resize(binary_length); |
| 318 | if (!LoadObjectFromPrecompiled(samplers_count)) { | 247 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { |
| 319 | return {}; | ||
| 320 | } | ||
| 321 | |||
| 322 | for (u32 i = 0; i < samplers_count; ++i) { | ||
| 323 | u64 offset{}; | ||
| 324 | u64 index{}; | ||
| 325 | u32 type{}; | ||
| 326 | bool is_array{}; | ||
| 327 | bool is_shadow{}; | ||
| 328 | bool is_bindless{}; | ||
| 329 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | ||
| 330 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || | ||
| 331 | !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { | ||
| 332 | return {}; | 248 | return {}; |
| 333 | } | 249 | } |
| 334 | entry.entries.samplers.emplace_back( | ||
| 335 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | ||
| 336 | static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); | ||
| 337 | } | ||
| 338 | 250 | ||
| 339 | u32 images_count{}; | 251 | dumps.emplace(usage, dump); |
| 340 | if (!LoadObjectFromPrecompiled(images_count)) { | ||
| 341 | return {}; | ||
| 342 | } | ||
| 343 | for (u32 i = 0; i < images_count; ++i) { | ||
| 344 | u64 offset{}; | ||
| 345 | u64 index{}; | ||
| 346 | u32 type{}; | ||
| 347 | u8 is_bindless{}; | ||
| 348 | u8 is_written{}; | ||
| 349 | u8 is_read{}; | ||
| 350 | u8 is_atomic{}; | ||
| 351 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | ||
| 352 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || | ||
| 353 | !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || | ||
| 354 | !LoadObjectFromPrecompiled(is_atomic)) { | ||
| 355 | return {}; | ||
| 356 | } | ||
| 357 | entry.entries.images.emplace_back( | ||
| 358 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | ||
| 359 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, | ||
| 360 | is_read != 0, is_atomic != 0); | ||
| 361 | } | ||
| 362 | |||
| 363 | u32 global_memory_count{}; | ||
| 364 | if (!LoadObjectFromPrecompiled(global_memory_count)) { | ||
| 365 | return {}; | ||
| 366 | } | 252 | } |
| 367 | for (u32 i = 0; i < global_memory_count; ++i) { | 253 | return dumps; |
| 368 | u32 cbuf_index{}; | ||
| 369 | u32 cbuf_offset{}; | ||
| 370 | bool is_read{}; | ||
| 371 | bool is_written{}; | ||
| 372 | if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || | ||
| 373 | !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { | ||
| 374 | return {}; | ||
| 375 | } | ||
| 376 | entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, | ||
| 377 | is_written); | ||
| 378 | } | ||
| 379 | |||
| 380 | for (auto& clip_distance : entry.entries.clip_distances) { | ||
| 381 | if (!LoadObjectFromPrecompiled(clip_distance)) { | ||
| 382 | return {}; | ||
| 383 | } | ||
| 384 | } | ||
| 385 | |||
| 386 | u64 shader_length{}; | ||
| 387 | if (!LoadObjectFromPrecompiled(shader_length)) { | ||
| 388 | return {}; | ||
| 389 | } | ||
| 390 | entry.entries.shader_length = static_cast<std::size_t>(shader_length); | ||
| 391 | |||
| 392 | return entry; | ||
| 393 | } | ||
| 394 | |||
| 395 | bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, | ||
| 396 | const GLShader::ShaderEntries& entries) { | ||
| 397 | if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || | ||
| 398 | !SaveObjectToPrecompiled(unique_identifier) || | ||
| 399 | !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || | ||
| 400 | !SaveArrayToPrecompiled(code.data(), code.size())) { | ||
| 401 | return false; | ||
| 402 | } | ||
| 403 | |||
| 404 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { | ||
| 405 | return false; | ||
| 406 | } | ||
| 407 | for (const auto& cbuf : entries.const_buffers) { | ||
| 408 | if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || | ||
| 409 | !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || | ||
| 410 | !SaveObjectToPrecompiled(cbuf.IsIndirect())) { | ||
| 411 | return false; | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { | ||
| 416 | return false; | ||
| 417 | } | ||
| 418 | for (const auto& sampler : entries.samplers) { | ||
| 419 | if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || | ||
| 420 | !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || | ||
| 421 | !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || | ||
| 422 | !SaveObjectToPrecompiled(sampler.IsArray()) || | ||
| 423 | !SaveObjectToPrecompiled(sampler.IsShadow()) || | ||
| 424 | !SaveObjectToPrecompiled(sampler.IsBindless())) { | ||
| 425 | return false; | ||
| 426 | } | ||
| 427 | } | ||
| 428 | |||
| 429 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { | ||
| 430 | return false; | ||
| 431 | } | ||
| 432 | for (const auto& image : entries.images) { | ||
| 433 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || | ||
| 434 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || | ||
| 435 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || | ||
| 436 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || | ||
| 437 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || | ||
| 438 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || | ||
| 439 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { | ||
| 440 | return false; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | |||
| 444 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { | ||
| 445 | return false; | ||
| 446 | } | ||
| 447 | for (const auto& gmem : entries.global_memory_entries) { | ||
| 448 | if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || | ||
| 449 | !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || | ||
| 450 | !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { | ||
| 451 | return false; | ||
| 452 | } | ||
| 453 | } | ||
| 454 | |||
| 455 | for (const bool clip_distance : entries.clip_distances) { | ||
| 456 | if (!SaveObjectToPrecompiled(clip_distance)) { | ||
| 457 | return false; | ||
| 458 | } | ||
| 459 | } | ||
| 460 | |||
| 461 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | ||
| 462 | return false; | ||
| 463 | } | ||
| 464 | |||
| 465 | return true; | ||
| 466 | } | 254 | } |
| 467 | 255 | ||
| 468 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | 256 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { |
| @@ -532,28 +320,18 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | |||
| 532 | } | 320 | } |
| 533 | } | 321 | } |
| 534 | 322 | ||
| 535 | void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, | 323 | void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { |
| 536 | const GLShader::ShaderEntries& entries) { | ||
| 537 | if (!is_usable) { | 324 | if (!is_usable) { |
| 538 | return; | 325 | return; |
| 539 | } | 326 | } |
| 540 | 327 | ||
| 328 | // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header | ||
| 329 | // when writing the dump. This should be done the moment I get access to write to the virtual | ||
| 330 | // file. | ||
| 541 | if (precompiled_cache_virtual_file.GetSize() == 0) { | 331 | if (precompiled_cache_virtual_file.GetSize() == 0) { |
| 542 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | 332 | SavePrecompiledHeaderToVirtualPrecompiledCache(); |
| 543 | } | 333 | } |
| 544 | 334 | ||
| 545 | if (!SaveDecompiledFile(unique_identifier, code, entries)) { | ||
| 546 | LOG_ERROR(Render_OpenGL, | ||
| 547 | "Failed to save decompiled entry to the precompiled file - removing"); | ||
| 548 | InvalidatePrecompiled(); | ||
| 549 | } | ||
| 550 | } | ||
| 551 | |||
| 552 | void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { | ||
| 553 | if (!is_usable) { | ||
| 554 | return; | ||
| 555 | } | ||
| 556 | |||
| 557 | GLint binary_length{}; | 335 | GLint binary_length{}; |
| 558 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | 336 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |
| 559 | 337 | ||
| @@ -561,8 +339,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 561 | std::vector<u8> binary(binary_length); | 339 | std::vector<u8> binary(binary_length); |
| 562 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 340 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 563 | 341 | ||
| 564 | if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || | 342 | if (!SaveObjectToPrecompiled(usage) || |
| 565 | !SaveObjectToPrecompiled(usage) || | ||
| 566 | !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | 343 | !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || |
| 567 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | 344 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || |
| 568 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | 345 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { |
| @@ -574,8 +351,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 574 | } | 351 | } |
| 575 | 352 | ||
| 576 | FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | 353 | FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { |
| 577 | if (!EnsureDirectories()) | 354 | if (!EnsureDirectories()) { |
| 578 | return {}; | 355 | return {}; |
| 356 | } | ||
| 579 | 357 | ||
| 580 | const auto transferable_path{GetTransferablePath()}; | 358 | const auto transferable_path{GetTransferablePath()}; |
| 581 | const bool existed = FileUtil::Exists(transferable_path); | 359 | const bool existed = FileUtil::Exists(transferable_path); |
| @@ -607,8 +385,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { | |||
| 607 | 385 | ||
| 608 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | 386 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { |
| 609 | precompiled_cache_virtual_file_offset = 0; | 387 | precompiled_cache_virtual_file_offset = 0; |
| 610 | const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); | 388 | const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); |
| 611 | const std::vector<u8>& compressed = | 389 | const std::vector<u8> compressed = |
| 612 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); | 390 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); |
| 613 | 391 | ||
| 614 | const auto precompiled_path{GetPrecompiledPath()}; | 392 | const auto precompiled_path{GetPrecompiledPath()}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 9595bd71b..61b46d728 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -123,8 +123,7 @@ namespace OpenGL { | |||
| 123 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 124 | public: | 124 | public: |
| 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 126 | u32 program_code_size, u32 program_code_size_b, | 126 | ProgramCode program_code, ProgramCode program_code_b = {}); |
| 127 | ProgramCode program_code, ProgramCode program_code_b); | ||
| 128 | ShaderDiskCacheRaw(); | 127 | ShaderDiskCacheRaw(); |
| 129 | ~ShaderDiskCacheRaw(); | 128 | ~ShaderDiskCacheRaw(); |
| 130 | 129 | ||
| @@ -155,22 +154,14 @@ public: | |||
| 155 | private: | 154 | private: |
| 156 | u64 unique_identifier{}; | 155 | u64 unique_identifier{}; |
| 157 | ProgramType program_type{}; | 156 | ProgramType program_type{}; |
| 158 | u32 program_code_size{}; | ||
| 159 | u32 program_code_size_b{}; | ||
| 160 | 157 | ||
| 161 | ProgramCode program_code; | 158 | ProgramCode program_code; |
| 162 | ProgramCode program_code_b; | 159 | ProgramCode program_code_b; |
| 163 | }; | 160 | }; |
| 164 | 161 | ||
| 165 | /// Contains decompiled data from a shader | ||
| 166 | struct ShaderDiskCacheDecompiled { | ||
| 167 | std::string code; | ||
| 168 | GLShader::ShaderEntries entries; | ||
| 169 | }; | ||
| 170 | |||
| 171 | /// Contains an OpenGL dumped binary program | 162 | /// Contains an OpenGL dumped binary program |
| 172 | struct ShaderDiskCacheDump { | 163 | struct ShaderDiskCacheDump { |
| 173 | GLenum binary_format; | 164 | GLenum binary_format{}; |
| 174 | std::vector<u8> binary; | 165 | std::vector<u8> binary; |
| 175 | }; | 166 | }; |
| 176 | 167 | ||
| @@ -184,9 +175,7 @@ public: | |||
| 184 | LoadTransferable(); | 175 | LoadTransferable(); |
| 185 | 176 | ||
| 186 | /// Loads current game's precompiled cache. Invalidates on failure. | 177 | /// Loads current game's precompiled cache. Invalidates on failure. |
| 187 | std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, | 178 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); |
| 188 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | ||
| 189 | LoadPrecompiled(); | ||
| 190 | 179 | ||
| 191 | /// Removes the transferable (and precompiled) cache file. | 180 | /// Removes the transferable (and precompiled) cache file. |
| 192 | void InvalidateTransferable(); | 181 | void InvalidateTransferable(); |
| @@ -200,10 +189,6 @@ public: | |||
| 200 | /// Saves shader usage to the transferable file. Does not check for collisions. | 189 | /// Saves shader usage to the transferable file. Does not check for collisions. |
| 201 | void SaveUsage(const ShaderDiskCacheUsage& usage); | 190 | void SaveUsage(const ShaderDiskCacheUsage& usage); |
| 202 | 191 | ||
| 203 | /// Saves a decompiled entry to the precompiled file. Does not check for collisions. | ||
| 204 | void SaveDecompiled(u64 unique_identifier, const std::string& code, | ||
| 205 | const GLShader::ShaderEntries& entries); | ||
| 206 | |||
| 207 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | 192 | /// Saves a dump entry to the precompiled file. Does not check for collisions. |
| 208 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | 193 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); |
| 209 | 194 | ||
| @@ -212,18 +197,9 @@ public: | |||
| 212 | 197 | ||
| 213 | private: | 198 | private: |
| 214 | /// Loads the transferable cache. Returns empty on failure. | 199 | /// Loads the transferable cache. Returns empty on failure. |
| 215 | std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, | 200 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> |
| 216 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> | ||
| 217 | LoadPrecompiledFile(FileUtil::IOFile& file); | 201 | LoadPrecompiledFile(FileUtil::IOFile& file); |
| 218 | 202 | ||
| 219 | /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on | ||
| 220 | /// failure. | ||
| 221 | std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); | ||
| 222 | |||
| 223 | /// Saves a decompiled entry to the passed file. Returns true on success. | ||
| 224 | bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, | ||
| 225 | const GLShader::ShaderEntries& entries); | ||
| 226 | |||
| 227 | /// Opens current game's transferable file and write it's header if it doesn't exist | 203 | /// Opens current game's transferable file and write it's header if it doesn't exist |
| 228 | FileUtil::IOFile AppendTransferableFile() const; | 204 | FileUtil::IOFile AppendTransferableFile() const; |
| 229 | 205 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 817c6e12c..0e22eede9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -16,18 +16,8 @@ using VideoCommon::Shader::CompilerSettings; | |||
| 16 | using VideoCommon::Shader::ProgramCode; | 16 | using VideoCommon::Shader::ProgramCode; |
| 17 | using VideoCommon::Shader::ShaderIR; | 17 | using VideoCommon::Shader::ShaderIR; |
| 18 | 18 | ||
| 19 | static constexpr u32 PROGRAM_OFFSET = 10; | 19 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { |
| 20 | static constexpr u32 COMPUTE_OFFSET = 0; | 20 | std::string out = GetCommonDeclarations(); |
| 21 | |||
| 22 | static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; | ||
| 23 | |||
| 24 | ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, | ||
| 25 | const ShaderSetup& setup) { | ||
| 26 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 27 | |||
| 28 | std::string out = "// Shader Unique Id: VS" + id + "\n\n"; | ||
| 29 | out += GetCommonDeclarations(); | ||
| 30 | |||
| 31 | out += R"( | 21 | out += R"( |
| 32 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | 22 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { |
| 33 | vec4 viewport_flip; | 23 | vec4 viewport_flip; |
| @@ -35,18 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 35 | }; | 25 | }; |
| 36 | 26 | ||
| 37 | )"; | 27 | )"; |
| 38 | 28 | const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; | |
| 39 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, | 29 | out += Decompile(device, ir, stage, "vertex"); |
| 40 | locker); | 30 | if (ir_b) { |
| 41 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | 31 | out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); |
| 42 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 43 | out += program.first; | ||
| 44 | |||
| 45 | if (setup.IsDualProgram()) { | ||
| 46 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, | ||
| 47 | settings, locker); | ||
| 48 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); | ||
| 49 | out += program_b.first; | ||
| 50 | } | 32 | } |
| 51 | 33 | ||
| 52 | out += R"( | 34 | out += R"( |
| @@ -54,7 +36,7 @@ void main() { | |||
| 54 | execute_vertex(); | 36 | execute_vertex(); |
| 55 | )"; | 37 | )"; |
| 56 | 38 | ||
| 57 | if (setup.IsDualProgram()) { | 39 | if (ir_b) { |
| 58 | out += " execute_vertex_b();"; | 40 | out += " execute_vertex_b();"; |
| 59 | } | 41 | } |
| 60 | 42 | ||
| @@ -68,18 +50,13 @@ void main() { | |||
| 68 | // Viewport can be flipped, which is unsupported by glViewport | 50 | // Viewport can be flipped, which is unsupported by glViewport |
| 69 | gl_Position.xy *= viewport_flip.xy; | 51 | gl_Position.xy *= viewport_flip.xy; |
| 70 | } | 52 | } |
| 71 | })"; | 53 | } |
| 72 | 54 | )"; | |
| 73 | return {std::move(out), std::move(program.second)}; | 55 | return out; |
| 74 | } | 56 | } |
| 75 | 57 | ||
| 76 | ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, | 58 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { |
| 77 | const ShaderSetup& setup) { | 59 | std::string out = GetCommonDeclarations(); |
| 78 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 79 | |||
| 80 | std::string out = "// Shader Unique Id: GS" + id + "\n\n"; | ||
| 81 | out += GetCommonDeclarations(); | ||
| 82 | |||
| 83 | out += R"( | 60 | out += R"( |
| 84 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | 61 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { |
| 85 | vec4 viewport_flip; | 62 | vec4 viewport_flip; |
| @@ -87,27 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 87 | }; | 64 | }; |
| 88 | 65 | ||
| 89 | )"; | 66 | )"; |
| 90 | 67 | out += Decompile(device, ir, ProgramType::Geometry, "geometry"); | |
| 91 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, | ||
| 92 | locker); | ||
| 93 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); | ||
| 94 | out += program.first; | ||
| 95 | 68 | ||
| 96 | out += R"( | 69 | out += R"( |
| 97 | void main() { | 70 | void main() { |
| 98 | execute_geometry(); | 71 | execute_geometry(); |
| 99 | };)"; | 72 | } |
| 100 | 73 | )"; | |
| 101 | return {std::move(out), std::move(program.second)}; | 74 | return out; |
| 102 | } | 75 | } |
| 103 | 76 | ||
| 104 | ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, | 77 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { |
| 105 | const ShaderSetup& setup) { | 78 | std::string out = GetCommonDeclarations(); |
| 106 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 107 | |||
| 108 | std::string out = "// Shader Unique Id: FS" + id + "\n\n"; | ||
| 109 | out += GetCommonDeclarations(); | ||
| 110 | |||
| 111 | out += R"( | 79 | out += R"( |
| 112 | layout (location = 0) out vec4 FragColor0; | 80 | layout (location = 0) out vec4 FragColor0; |
| 113 | layout (location = 1) out vec4 FragColor1; | 81 | layout (location = 1) out vec4 FragColor1; |
| @@ -124,39 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 124 | }; | 92 | }; |
| 125 | 93 | ||
| 126 | )"; | 94 | )"; |
| 127 | 95 | out += Decompile(device, ir, ProgramType::Fragment, "fragment"); | |
| 128 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, | ||
| 129 | locker); | ||
| 130 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); | ||
| 131 | out += program.first; | ||
| 132 | 96 | ||
| 133 | out += R"( | 97 | out += R"( |
| 134 | void main() { | 98 | void main() { |
| 135 | execute_fragment(); | 99 | execute_fragment(); |
| 136 | } | 100 | } |
| 137 | |||
| 138 | )"; | 101 | )"; |
| 139 | return {std::move(out), std::move(program.second)}; | 102 | return out; |
| 140 | } | 103 | } |
| 141 | 104 | ||
| 142 | ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, | 105 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { |
| 143 | const ShaderSetup& setup) { | 106 | std::string out = GetCommonDeclarations(); |
| 144 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 107 | out += Decompile(device, ir, ProgramType::Compute, "compute"); |
| 145 | |||
| 146 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 147 | out += GetCommonDeclarations(); | ||
| 148 | |||
| 149 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings, | ||
| 150 | locker); | ||
| 151 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 152 | out += program.first; | ||
| 153 | |||
| 154 | out += R"( | 108 | out += R"( |
| 155 | void main() { | 109 | void main() { |
| 156 | execute_compute(); | 110 | execute_compute(); |
| 157 | } | 111 | } |
| 158 | )"; | 112 | )"; |
| 159 | return {std::move(out), std::move(program.second)}; | 113 | return out; |
| 160 | } | 114 | } |
| 161 | 115 | ||
| 162 | } // namespace OpenGL::GLShader | 116 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 05f157298..cba2be9f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -16,50 +16,19 @@ class Device; | |||
| 16 | 16 | ||
| 17 | namespace OpenGL::GLShader { | 17 | namespace OpenGL::GLShader { |
| 18 | 18 | ||
| 19 | using VideoCommon::Shader::ConstBufferLocker; | ||
| 20 | using VideoCommon::Shader::ProgramCode; | 19 | using VideoCommon::Shader::ProgramCode; |
| 21 | 20 | using VideoCommon::Shader::ShaderIR; | |
| 22 | struct ShaderSetup { | ||
| 23 | explicit ShaderSetup(ProgramCode program_code) { | ||
| 24 | program.code = std::move(program_code); | ||
| 25 | } | ||
| 26 | |||
| 27 | struct { | ||
| 28 | ProgramCode code; | ||
| 29 | ProgramCode code_b; // Used for dual vertex shaders | ||
| 30 | u64 unique_identifier; | ||
| 31 | std::size_t size_a; | ||
| 32 | std::size_t size_b; | ||
| 33 | } program; | ||
| 34 | |||
| 35 | /// Used in scenarios where we have a dual vertex shaders | ||
| 36 | void SetProgramB(ProgramCode program_b) { | ||
| 37 | program.code_b = std::move(program_b); | ||
| 38 | has_program_b = true; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool IsDualProgram() const { | ||
| 42 | return has_program_b; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | bool has_program_b{}; | ||
| 47 | }; | ||
| 48 | 21 | ||
| 49 | /// Generates the GLSL vertex shader program source code for the given VS program | 22 | /// Generates the GLSL vertex shader program source code for the given VS program |
| 50 | ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, | 23 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); |
| 51 | const ShaderSetup& setup); | ||
| 52 | 24 | ||
| 53 | /// Generates the GLSL geometry shader program source code for the given GS program | 25 | /// Generates the GLSL geometry shader program source code for the given GS program |
| 54 | ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, | 26 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); |
| 55 | const ShaderSetup& setup); | ||
| 56 | 27 | ||
| 57 | /// Generates the GLSL fragment shader program source code for the given FS program | 28 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 58 | ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, | 29 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); |
| 59 | const ShaderSetup& setup); | ||
| 60 | 30 | ||
| 61 | /// Generates the GLSL compute shader program source code for the given CS program | 31 | /// Generates the GLSL compute shader program source code for the given CS program |
| 62 | ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, | 32 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); |
| 63 | const ShaderSetup& setup); | ||
| 64 | 33 | ||
| 65 | } // namespace OpenGL::GLShader | 34 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 9d23bcecf..37a0968a1 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -15,15 +15,15 @@ ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | |||
| 15 | : engine{nullptr}, shader_stage{shader_stage} {} | 15 | : engine{nullptr}, shader_stage{shader_stage} {} |
| 16 | 16 | ||
| 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, |
| 18 | Tegra::Engines::ConstBufferEngineInterface* engine) | 18 | Tegra::Engines::ConstBufferEngineInterface& engine) |
| 19 | : engine{engine}, shader_stage{shader_stage} {} | 19 | : engine{&engine}, shader_stage{shader_stage} {} |
| 20 | 20 | ||
| 21 | bool ConstBufferLocker::IsEngineSet() const { | 21 | bool ConstBufferLocker::IsEngineSet() const { |
| 22 | return engine != nullptr; | 22 | return engine != nullptr; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine_) { | 25 | void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine_) { |
| 26 | engine = engine_; | 26 | engine = &engine_; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | 29 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 13eeba320..54459977f 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -21,14 +21,14 @@ public: | |||
| 21 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | 21 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); |
| 22 | 22 | ||
| 23 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | 23 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, |
| 24 | Tegra::Engines::ConstBufferEngineInterface* engine); | 24 | Tegra::Engines::ConstBufferEngineInterface& engine); |
| 25 | 25 | ||
| 26 | // Checks if an engine is setup, it may be possible that during disk shader | 26 | // Checks if an engine is setup, it may be possible that during disk shader |
| 27 | // cache run, the engines have not been created yet. | 27 | // cache run, the engines have not been created yet. |
| 28 | bool IsEngineSet() const; | 28 | bool IsEngineSet() const; |
| 29 | 29 | ||
| 30 | // Use this to set/change the engine used for this shader. | 30 | // Use this to set/change the engine used for this shader. |
| 31 | void SetEngine(Tegra::Engines::ConstBufferEngineInterface* engine); | 31 | void SetEngine(Tegra::Engines::ConstBufferEngineInterface& engine); |
| 32 | 32 | ||
| 33 | // Retrieves a key from the locker, if it's registered, it will give the | 33 | // Retrieves a key from the locker, if it's registered, it will give the |
| 34 | // registered value, if not it will obtain it from maxwell3d and register it. | 34 | // registered value, if not it will obtain it from maxwell3d and register it. |
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index d1c269ea7..6c698bcff 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -66,10 +66,11 @@ struct BlockInfo { | |||
| 66 | }; | 66 | }; |
| 67 | 67 | ||
| 68 | struct CFGRebuildState { | 68 | struct CFGRebuildState { |
| 69 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | 69 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) |
| 70 | const u32 start, ConstBufferLocker& locker) | 70 | : program_code{program_code}, start{start}, locker{locker} {} |
| 71 | : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} | ||
| 72 | 71 | ||
| 72 | const ProgramCode& program_code; | ||
| 73 | ConstBufferLocker& locker; | ||
| 73 | u32 start{}; | 74 | u32 start{}; |
| 74 | std::vector<BlockInfo> block_info{}; | 75 | std::vector<BlockInfo> block_info{}; |
| 75 | std::list<u32> inspect_queries{}; | 76 | std::list<u32> inspect_queries{}; |
| @@ -79,10 +80,7 @@ struct CFGRebuildState { | |||
| 79 | std::map<u32, u32> ssy_labels{}; | 80 | std::map<u32, u32> ssy_labels{}; |
| 80 | std::map<u32, u32> pbk_labels{}; | 81 | std::map<u32, u32> pbk_labels{}; |
| 81 | std::unordered_map<u32, BlockStack> stacks{}; | 82 | std::unordered_map<u32, BlockStack> stacks{}; |
| 82 | const ProgramCode& program_code; | ||
| 83 | const std::size_t program_size; | ||
| 84 | ASTManager* manager; | 83 | ASTManager* manager; |
| 85 | ConstBufferLocker& locker; | ||
| 86 | }; | 84 | }; |
| 87 | 85 | ||
| 88 | enum class BlockCollision : u32 { None, Found, Inside }; | 86 | enum class BlockCollision : u32 { None, Found, Inside }; |
| @@ -242,7 +240,7 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& | |||
| 242 | 240 | ||
| 243 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | 241 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { |
| 244 | u32 offset = static_cast<u32>(address); | 242 | u32 offset = static_cast<u32>(address); |
| 245 | const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | 243 | const u32 end_address = static_cast<u32>(state.program_code.size()); |
| 246 | ParseInfo parse_info{}; | 244 | ParseInfo parse_info{}; |
| 247 | SingleBranch single_branch{}; | 245 | SingleBranch single_branch{}; |
| 248 | 246 | ||
| @@ -583,6 +581,7 @@ bool TryQuery(CFGRebuildState& state) { | |||
| 583 | } | 581 | } |
| 584 | return true; | 582 | return true; |
| 585 | } | 583 | } |
| 584 | |||
| 586 | } // Anonymous namespace | 585 | } // Anonymous namespace |
| 587 | 586 | ||
| 588 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { | 587 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { |
| @@ -651,8 +650,7 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 651 | state.manager->Decompile(); | 650 | state.manager->Decompile(); |
| 652 | } | 651 | } |
| 653 | 652 | ||
| 654 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | 653 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 655 | std::size_t program_size, u32 start_address, | ||
| 656 | const CompilerSettings& settings, | 654 | const CompilerSettings& settings, |
| 657 | ConstBufferLocker& locker) { | 655 | ConstBufferLocker& locker) { |
| 658 | auto result_out = std::make_unique<ShaderCharacteristics>(); | 656 | auto result_out = std::make_unique<ShaderCharacteristics>(); |
| @@ -661,7 +659,7 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | |||
| 661 | return result_out; | 659 | return result_out; |
| 662 | } | 660 | } |
| 663 | 661 | ||
| 664 | CFGRebuildState state{program_code, program_size, start_address, locker}; | 662 | CFGRebuildState state{program_code, start_address, locker}; |
| 665 | // Inspect Code and generate blocks | 663 | // Inspect Code and generate blocks |
| 666 | state.labels.clear(); | 664 | state.labels.clear(); |
| 667 | state.labels.emplace(start_address); | 665 | state.labels.emplace(start_address); |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 369ca255b..288ee68af 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -105,8 +105,7 @@ struct ShaderCharacteristics { | |||
| 105 | CompilerSettings settings{}; | 105 | CompilerSettings settings{}; |
| 106 | }; | 106 | }; |
| 107 | 107 | ||
| 108 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | 108 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 109 | std::size_t program_size, u32 start_address, | ||
| 110 | const CompilerSettings& settings, | 109 | const CompilerSettings& settings, |
| 111 | ConstBufferLocker& locker); | 110 | ConstBufferLocker& locker); |
| 112 | 111 | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 053241128..e1afa4582 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 33 | return (absolute_offset % SchedPeriod) == 0; | 33 | return (absolute_offset % SchedPeriod) == 0; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | } // namespace | 36 | } // Anonymous namespace |
| 37 | 37 | ||
| 38 | class ASTDecoder { | 38 | class ASTDecoder { |
| 39 | public: | 39 | public: |
| @@ -102,7 +102,7 @@ void ShaderIR::Decode() { | |||
| 102 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 102 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 103 | 103 | ||
| 104 | decompiled = false; | 104 | decompiled = false; |
| 105 | auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); | 105 | auto info = ScanFlow(program_code, main_offset, settings, locker); |
| 106 | auto& shader_info = *info; | 106 | auto& shader_info = *info; |
| 107 | coverage_begin = shader_info.start; | 107 | coverage_begin = shader_info.start; |
| 108 | coverage_end = shader_info.end; | 108 | coverage_end = shader_info.end; |
| @@ -155,7 +155,7 @@ void ShaderIR::Decode() { | |||
| 155 | [[fallthrough]]; | 155 | [[fallthrough]]; |
| 156 | case CompileDepth::BruteForce: { | 156 | case CompileDepth::BruteForce: { |
| 157 | coverage_begin = main_offset; | 157 | coverage_begin = main_offset; |
| 158 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | 158 | const u32 shader_end = program_code.size(); |
| 159 | coverage_end = shader_end; | 159 | coverage_end = shader_end; |
| 160 | for (u32 label = main_offset; label < shader_end; label++) { | 160 | for (u32 label = main_offset; label < shader_end; label++) { |
| 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); |
| @@ -225,7 +225,8 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | |||
| 225 | for (auto& branch_case : multi_branch->branches) { | 225 | for (auto& branch_case : multi_branch->branches) { |
| 226 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); | 226 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); |
| 227 | Node op_b = Immediate(branch_case.cmp_value); | 227 | Node op_b = Immediate(branch_case.cmp_value); |
| 228 | Node condition = GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); | 228 | Node condition = |
| 229 | GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); | ||
| 229 | auto result = Conditional(condition, {n}); | 230 | auto result = Conditional(condition, {n}); |
| 230 | bb.push_back(result); | 231 | bb.push_back(result); |
| 231 | global_code.push_back(result); | 232 | global_code.push_back(result); |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 6430575ec..1d718ccc6 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -22,10 +22,9 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 22 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 23 | using Tegra::Shader::Register; |
| 24 | 24 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, | 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 26 | CompilerSettings settings, ConstBufferLocker& locker) | 26 | ConstBufferLocker& locker) |
| 27 | : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, | 27 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { |
| 28 | program_manager{true, true}, settings{settings}, locker{locker} { | ||
| 29 | Decode(); | 28 | Decode(); |
| 30 | } | 29 | } |
| 31 | 30 | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 3a3e381d2..3ebea91b9 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -67,8 +67,8 @@ struct GlobalMemoryUsage { | |||
| 67 | 67 | ||
| 68 | class ShaderIR final { | 68 | class ShaderIR final { |
| 69 | public: | 69 | public: |
| 70 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, | 70 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 71 | CompilerSettings settings, ConstBufferLocker& locker); | 71 | ConstBufferLocker& locker); |
| 72 | ~ShaderIR(); | 72 | ~ShaderIR(); |
| 73 | 73 | ||
| 74 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 74 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -384,7 +384,9 @@ private: | |||
| 384 | 384 | ||
| 385 | const ProgramCode& program_code; | 385 | const ProgramCode& program_code; |
| 386 | const u32 main_offset; | 386 | const u32 main_offset; |
| 387 | const std::size_t program_size; | 387 | const CompilerSettings settings; |
| 388 | ConstBufferLocker& locker; | ||
| 389 | |||
| 388 | bool decompiled{}; | 390 | bool decompiled{}; |
| 389 | bool disable_flow_stack{}; | 391 | bool disable_flow_stack{}; |
| 390 | 392 | ||
| @@ -393,9 +395,7 @@ private: | |||
| 393 | 395 | ||
| 394 | std::map<u32, NodeBlock> basic_blocks; | 396 | std::map<u32, NodeBlock> basic_blocks; |
| 395 | NodeBlock global_code; | 397 | NodeBlock global_code; |
| 396 | ASTManager program_manager; | 398 | ASTManager program_manager{true, true}; |
| 397 | CompilerSettings settings{}; | ||
| 398 | ConstBufferLocker& locker; | ||
| 399 | 399 | ||
| 400 | std::set<u32> used_registers; | 400 | std::set<u32> used_registers; |
| 401 | std::set<Tegra::Shader::Pred> used_predicates; | 401 | std::set<Tegra::Shader::Pred> used_predicates; |