diff options
| author | 2019-01-14 00:58:15 -0300 | |
|---|---|---|
| committer | 2019-02-06 22:20:57 -0300 | |
| commit | a3703f5767332dfc5f7e8d37a1f715d8ccb76fcf (patch) | |
| tree | 84ffe77dabc36e77dc0fb543d96d8774483c67a0 | |
| parent | gl_shader_disk_cache: Add transferable cache invalidation (diff) | |
| download | yuzu-a3703f5767332dfc5f7e8d37a1f715d8ccb76fcf.tar.gz yuzu-a3703f5767332dfc5f7e8d37a1f715d8ccb76fcf.tar.xz yuzu-a3703f5767332dfc5f7e8d37a1f715d8ccb76fcf.zip | |
gl_shader_cache: Refactor to support disk shader cache
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 450 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 59 |
2 files changed, 388 insertions, 121 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 363b941f3..a70ff79d0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 14 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 15 | #include "video_core/renderer_opengl/utils.h" | 16 | #include "video_core/renderer_opengl/utils.h" |
| 16 | #include "video_core/shader/shader_ir.h" | 17 | #include "video_core/shader/shader_ir.h" |
| @@ -19,8 +20,19 @@ namespace OpenGL { | |||
| 19 | 20 | ||
| 20 | using VideoCommon::Shader::ProgramCode; | 21 | using VideoCommon::Shader::ProgramCode; |
| 21 | 22 | ||
| 23 | // One UBO is always reserved for emulation values | ||
| 24 | constexpr u32 RESERVED_UBOS = 1; | ||
| 25 | |||
| 26 | struct UnspecializedShader { | ||
| 27 | std::string code; | ||
| 28 | GLShader::ShaderEntries entries; | ||
| 29 | Maxwell::ShaderProgram program_type; | ||
| 30 | }; | ||
| 31 | |||
| 32 | namespace { | ||
| 33 | |||
| 22 | /// Gets the address for the specified shader stage program | 34 | /// Gets the address for the specified shader stage program |
| 23 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 35 | VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 24 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 36 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 25 | const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; | 37 | const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; |
| 26 | const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + | 38 | const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + |
| @@ -30,7 +42,7 @@ static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | |||
| 30 | } | 42 | } |
| 31 | 43 | ||
| 32 | /// Gets the shader program code from memory for the specified address | 44 | /// Gets the shader program code from memory for the specified address |
| 33 | static ProgramCode GetShaderCode(VAddr addr) { | 45 | ProgramCode GetShaderCode(VAddr addr) { |
| 34 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | 46 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 35 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); | 47 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |
| 36 | return program_code; | 48 | return program_code; |
| @@ -51,38 +63,193 @@ constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 51 | } | 63 | } |
| 52 | } | 64 | } |
| 53 | 65 | ||
| 54 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | 66 | /// Gets if the current instruction offset is a scheduler instruction |
| 55 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | 67 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { |
| 68 | // Sched instructions appear once every 4 instructions. | ||
| 69 | constexpr std::size_t SchedPeriod = 4; | ||
| 70 | const std::size_t absolute_offset = offset - main_offset; | ||
| 71 | return (absolute_offset % SchedPeriod) == 0; | ||
| 72 | } | ||
| 56 | 73 | ||
| 57 | GLShader::ProgramResult program_result; | 74 | /// Describes primitive behavior on geometry shaders |
| 75 | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | ||
| 76 | switch (primitive_mode) { | ||
| 77 | case GL_POINTS: | ||
| 78 | return {"points", "Points", 1}; | ||
| 79 | case GL_LINES: | ||
| 80 | case GL_LINE_STRIP: | ||
| 81 | return {"lines", "Lines", 2}; | ||
| 82 | case GL_LINES_ADJACENCY: | ||
| 83 | case GL_LINE_STRIP_ADJACENCY: | ||
| 84 | return {"lines_adjacency", "LinesAdj", 4}; | ||
| 85 | case GL_TRIANGLES: | ||
| 86 | case GL_TRIANGLE_STRIP: | ||
| 87 | case GL_TRIANGLE_FAN: | ||
| 88 | return {"triangles", "Triangles", 3}; | ||
| 89 | case GL_TRIANGLES_ADJACENCY: | ||
| 90 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 91 | return {"triangles_adjacency", "TrianglesAdj", 6}; | ||
| 92 | default: | ||
| 93 | return {"points", "Invalid", 1}; | ||
| 94 | } | ||
| 95 | } | ||
| 58 | 96 | ||
| 59 | switch (program_type) { | 97 | /// Calculates the size of a program stream |
| 60 | case Maxwell::ShaderProgram::VertexA: | 98 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 99 | constexpr std::size_t start_offset = 10; | ||
| 100 | std::size_t offset = start_offset; | ||
| 101 | std::size_t size = start_offset * sizeof(u64); | ||
| 102 | while (offset < program.size()) { | ||
| 103 | const u64 instruction = program[offset]; | ||
| 104 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 105 | if (instruction == 0 || (instruction >> 52) == 0x50b) { | ||
| 106 | // End on Maxwell's "nop" instruction | ||
| 107 | break; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | size += sizeof(u64); | ||
| 111 | offset++; | ||
| 112 | } | ||
| 113 | // The last instruction is included in the program size | ||
| 114 | return std::min(size + sizeof(u64), program.size() * sizeof(u64)); | ||
| 115 | } | ||
| 116 | |||
| 117 | /// Hashes one (or two) program streams | ||
| 118 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | ||
| 119 | const ProgramCode& code_b) { | ||
| 120 | u64 unique_identifier = | ||
| 121 | Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); | ||
| 122 | if (program_type != Maxwell::ShaderProgram::VertexA) { | ||
| 123 | return unique_identifier; | ||
| 124 | } | ||
| 125 | // VertexA programs include two programs | ||
| 126 | |||
| 127 | std::size_t seed = 0; | ||
| 128 | boost::hash_combine(seed, unique_identifier); | ||
| 129 | |||
| 130 | const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), | ||
| 131 | CalculateProgramSize(code_b)); | ||
| 132 | boost::hash_combine(seed, identifier_b); | ||
| 133 | return static_cast<u64>(seed); | ||
| 134 | } | ||
| 135 | |||
| 136 | /// Creates an unspecialized program from code streams | ||
| 137 | GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code, | ||
| 138 | ProgramCode program_code_b) { | ||
| 139 | GLShader::ShaderSetup setup(std::move(program_code)); | ||
| 140 | if (program_type == Maxwell::ShaderProgram::VertexA) { | ||
| 61 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 141 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 62 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 142 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 63 | // stage here. | 143 | // stage here. |
| 64 | setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | 144 | setup.SetProgramB(std::move(program_code_b)); |
| 145 | } | ||
| 146 | |||
| 147 | switch (program_type) { | ||
| 148 | case Maxwell::ShaderProgram::VertexA: | ||
| 65 | case Maxwell::ShaderProgram::VertexB: | 149 | case Maxwell::ShaderProgram::VertexB: |
| 66 | CalculateProperties(); | 150 | return GLShader::GenerateVertexShader(setup); |
| 67 | program_result = GLShader::GenerateVertexShader(setup); | ||
| 68 | break; | ||
| 69 | case Maxwell::ShaderProgram::Geometry: | 151 | case Maxwell::ShaderProgram::Geometry: |
| 70 | CalculateProperties(); | 152 | return GLShader::GenerateGeometryShader(setup); |
| 71 | program_result = GLShader::GenerateGeometryShader(setup); | ||
| 72 | break; | ||
| 73 | case Maxwell::ShaderProgram::Fragment: | 153 | case Maxwell::ShaderProgram::Fragment: |
| 74 | CalculateProperties(); | 154 | return GLShader::GenerateFragmentShader(setup); |
| 75 | program_result = GLShader::GenerateFragmentShader(setup); | ||
| 76 | break; | ||
| 77 | default: | 155 | default: |
| 78 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 156 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 79 | UNREACHABLE(); | 157 | UNREACHABLE(); |
| 158 | return {}; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | ||
| 163 | Maxwell::ShaderProgram program_type, BaseBindings base_bindings, | ||
| 164 | GLenum primitive_mode, bool hint_retrievable = false) { | ||
| 165 | std::string source = "#version 430 core\n"; | ||
| 166 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 167 | |||
| 168 | for (const auto& cbuf : entries.const_buffers) { | ||
| 169 | source += | ||
| 170 | fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); | ||
| 171 | } | ||
| 172 | for (const auto& gmem : entries.global_memory_entries) { | ||
| 173 | source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), | ||
| 174 | gmem.GetCbufOffset(), base_bindings.gmem++); | ||
| 175 | } | ||
| 176 | for (const auto& sampler : entries.samplers) { | ||
| 177 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | ||
| 178 | base_bindings.sampler++); | ||
| 179 | } | ||
| 180 | |||
| 181 | if (program_type == Maxwell::ShaderProgram::Geometry) { | ||
| 182 | const auto [glsl_topology, _, max_vertices] = GetPrimitiveDescription(primitive_mode); | ||
| 183 | |||
| 184 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | ||
| 185 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | ||
| 186 | } | ||
| 187 | |||
| 188 | source += code; | ||
| 189 | |||
| 190 | OGLShader shader; | ||
| 191 | shader.Create(source.c_str(), GetShaderType(program_type)); | ||
| 192 | |||
| 193 | auto program = std::make_shared<OGLProgram>(); | ||
| 194 | program->Create(true, hint_retrievable, shader.handle); | ||
| 195 | return program; | ||
| 196 | } | ||
| 197 | |||
| 198 | std::set<GLenum> GetSupportedFormats() { | ||
| 199 | std::set<GLenum> supported_formats; | ||
| 200 | |||
| 201 | GLint num_formats{}; | ||
| 202 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||
| 203 | |||
| 204 | std::vector<GLint> formats(num_formats); | ||
| 205 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||
| 206 | |||
| 207 | for (const GLint format : formats) | ||
| 208 | supported_formats.insert(static_cast<GLenum>(format)); | ||
| 209 | return supported_formats; | ||
| 210 | } | ||
| 211 | |||
| 212 | } // namespace | ||
| 213 | |||
| 214 | CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | ||
| 215 | ShaderDiskCacheOpenGL& disk_cache, | ||
| 216 | const PrecompiledPrograms& precompiled_programs, | ||
| 217 | ProgramCode&& program_code, ProgramCode&& program_code_b) | ||
| 218 | : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, | ||
| 219 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { | ||
| 220 | |||
| 221 | const std::size_t code_size = CalculateProgramSize(program_code); | ||
| 222 | const std::size_t code_size_b = | ||
| 223 | program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b); | ||
| 224 | |||
| 225 | GLShader::ProgramResult program_result = | ||
| 226 | CreateProgram(program_type, program_code, program_code_b); | ||
| 227 | if (program_result.first.empty()) { | ||
| 228 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | ||
| 80 | return; | 229 | return; |
| 81 | } | 230 | } |
| 82 | 231 | ||
| 83 | code = program_result.first; | 232 | code = program_result.first; |
| 84 | entries = program_result.second; | 233 | entries = program_result.second; |
| 85 | shader_length = entries.shader_length; | 234 | shader_length = entries.shader_length; |
| 235 | |||
| 236 | const ShaderDiskCacheRaw raw(unique_identifier, program_type, | ||
| 237 | static_cast<u32>(code_size / sizeof(u64)), | ||
| 238 | static_cast<u32>(code_size_b / sizeof(u64)), | ||
| 239 | std::move(program_code), std::move(program_code_b)); | ||
| 240 | disk_cache.SaveRaw(raw); | ||
| 241 | } | ||
| 242 | |||
| 243 | CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | ||
| 244 | ShaderDiskCacheOpenGL& disk_cache, | ||
| 245 | const PrecompiledPrograms& precompiled_programs, | ||
| 246 | GLShader::ProgramResult result) | ||
| 247 | : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, | ||
| 248 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { | ||
| 249 | |||
| 250 | code = std::move(result.first); | ||
| 251 | entries = result.second; | ||
| 252 | shader_length = entries.shader_length; | ||
| 86 | } | 253 | } |
| 87 | 254 | ||
| 88 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, | 255 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, |
| @@ -94,138 +261,195 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | |||
| 94 | const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); | 261 | const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); |
| 95 | auto& program = entry->second; | 262 | auto& program = entry->second; |
| 96 | if (is_cache_miss) { | 263 | if (is_cache_miss) { |
| 97 | std::string source = AllocateBindings(base_bindings); | 264 | program = TryLoadProgram(primitive_mode, base_bindings); |
| 98 | source += code; | 265 | if (!program) { |
| 266 | program = | ||
| 267 | SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); | ||
| 268 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||
| 269 | } | ||
| 99 | 270 | ||
| 100 | OGLShader shader; | 271 | LabelGLObject(GL_PROGRAM, program->handle, addr); |
| 101 | shader.Create(source.c_str(), GetShaderType(program_type)); | ||
| 102 | program.Create(true, shader.handle); | ||
| 103 | LabelGLObject(GL_PROGRAM, program.handle, addr); | ||
| 104 | } | 272 | } |
| 105 | 273 | ||
| 106 | handle = program.handle; | 274 | handle = program->handle; |
| 107 | } | 275 | } |
| 108 | 276 | ||
| 109 | // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for | 277 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; |
| 110 | // emulation values | ||
| 111 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1; | ||
| 112 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 278 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 113 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 279 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| 114 | 280 | ||
| 115 | return {handle, base_bindings}; | 281 | return {handle, base_bindings}; |
| 116 | } | 282 | } |
| 117 | 283 | ||
| 118 | std::string CachedShader::AllocateBindings(BaseBindings base_bindings) { | ||
| 119 | std::string code = "#version 430 core\n"; | ||
| 120 | code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 121 | |||
| 122 | for (const auto& cbuf : entries.const_buffers) { | ||
| 123 | code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); | ||
| 124 | } | ||
| 125 | |||
| 126 | for (const auto& gmem : entries.global_memory_entries) { | ||
| 127 | code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), | ||
| 128 | gmem.GetCbufOffset(), base_bindings.gmem++); | ||
| 129 | } | ||
| 130 | |||
| 131 | for (const auto& sampler : entries.samplers) { | ||
| 132 | code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | ||
| 133 | base_bindings.sampler++); | ||
| 134 | } | ||
| 135 | |||
| 136 | return code; | ||
| 137 | } | ||
| 138 | |||
| 139 | GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { | 284 | GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { |
| 140 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); | 285 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); |
| 141 | auto& programs = entry->second; | 286 | auto& programs = entry->second; |
| 142 | 287 | ||
| 143 | switch (primitive_mode) { | 288 | switch (primitive_mode) { |
| 144 | case GL_POINTS: | 289 | case GL_POINTS: |
| 145 | return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | 290 | return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); |
| 146 | case GL_LINES: | 291 | case GL_LINES: |
| 147 | case GL_LINE_STRIP: | 292 | case GL_LINE_STRIP: |
| 148 | return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines"); | 293 | return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode); |
| 149 | case GL_LINES_ADJACENCY: | 294 | case GL_LINES_ADJACENCY: |
| 150 | case GL_LINE_STRIP_ADJACENCY: | 295 | case GL_LINE_STRIP_ADJACENCY: |
| 151 | return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4, | 296 | return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode); |
| 152 | "ShaderLinesAdjacency"); | ||
| 153 | case GL_TRIANGLES: | 297 | case GL_TRIANGLES: |
| 154 | case GL_TRIANGLE_STRIP: | 298 | case GL_TRIANGLE_STRIP: |
| 155 | case GL_TRIANGLE_FAN: | 299 | case GL_TRIANGLE_FAN: |
| 156 | return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3, | 300 | return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode); |
| 157 | "ShaderTriangles"); | ||
| 158 | case GL_TRIANGLES_ADJACENCY: | 301 | case GL_TRIANGLES_ADJACENCY: |
| 159 | case GL_TRIANGLE_STRIP_ADJACENCY: | 302 | case GL_TRIANGLE_STRIP_ADJACENCY: |
| 160 | return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, | 303 | return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode); |
| 161 | "triangles_adjacency", 6, "ShaderTrianglesAdjacency"); | ||
| 162 | default: | 304 | default: |
| 163 | UNREACHABLE_MSG("Unknown primitive mode."); | 305 | UNREACHABLE_MSG("Unknown primitive mode."); |
| 164 | return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | 306 | return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); |
| 165 | } | 307 | } |
| 166 | } | 308 | } |
| 167 | 309 | ||
| 168 | GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, | 310 | GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, |
| 169 | const std::string& glsl_topology, u32 max_vertices, | 311 | GLenum primitive_mode) { |
| 170 | const std::string& debug_name) { | 312 | if (target_program) { |
| 171 | if (target_program.handle != 0) { | 313 | return target_program->handle; |
| 172 | return target_program.handle; | 314 | } |
| 315 | const auto [_, debug_name, __] = GetPrimitiveDescription(primitive_mode); | ||
| 316 | target_program = TryLoadProgram(primitive_mode, base_bindings); | ||
| 317 | if (!target_program) { | ||
| 318 | target_program = | ||
| 319 | SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); | ||
| 320 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||
| 173 | } | 321 | } |
| 174 | std::string source = AllocateBindings(base_bindings); | ||
| 175 | source += "layout (" + glsl_topology + ") in;\n"; | ||
| 176 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | ||
| 177 | source += code; | ||
| 178 | 322 | ||
| 179 | OGLShader shader; | 323 | LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); |
| 180 | shader.Create(source.c_str(), GL_GEOMETRY_SHADER); | 324 | |
| 181 | target_program.Create(true, shader.handle); | 325 | return target_program->handle; |
| 182 | LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); | ||
| 183 | return target_program.handle; | ||
| 184 | }; | 326 | }; |
| 185 | 327 | ||
| 186 | static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | 328 | CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode, |
| 187 | // sched instructions appear once every 4 instructions. | 329 | BaseBindings base_bindings) const { |
| 188 | static constexpr std::size_t SchedPeriod = 4; | 330 | const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings)); |
| 189 | const std::size_t absolute_offset = offset - main_offset; | 331 | if (found == precompiled_programs.end()) { |
| 190 | return (absolute_offset % SchedPeriod) == 0; | 332 | return {}; |
| 333 | } | ||
| 334 | return found->second; | ||
| 191 | } | 335 | } |
| 192 | 336 | ||
| 193 | static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 337 | ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, |
| 194 | constexpr std::size_t start_offset = 10; | 338 | BaseBindings base_bindings) const { |
| 195 | std::size_t offset = start_offset; | 339 | return {unique_identifier, base_bindings, primitive_mode}; |
| 196 | std::size_t size = start_offset * sizeof(u64); | 340 | } |
| 197 | while (offset < program.size()) { | 341 | |
| 198 | const u64 inst = program[offset]; | 342 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} |
| 199 | if (!IsSchedInstruction(offset, start_offset)) { | 343 | |
| 200 | if (inst == 0 || (inst >> 52) == 0x50b) { | 344 | void ShaderCacheOpenGL::LoadDiskCache() { |
| 201 | break; | 345 | std::vector<ShaderDiskCacheRaw> raws; |
| 202 | } | 346 | std::vector<ShaderDiskCacheUsage> usages; |
| 347 | if (!disk_cache.LoadTransferable(raws, usages)) { | ||
| 348 | return; | ||
| 349 | } | ||
| 350 | |||
| 351 | std::vector<ShaderDiskCachePrecompiledEntry> precompiled = disk_cache.LoadPrecompiled(); | ||
| 352 | const auto SearchPrecompiled = [&precompiled](const ShaderDiskCacheUsage& usage) { | ||
| 353 | return std::find_if( | ||
| 354 | precompiled.begin(), precompiled.end(), | ||
| 355 | [&usage](const auto& precompiled_entry) { return precompiled_entry.usage == usage; }); | ||
| 356 | }; | ||
| 357 | |||
| 358 | const std::set<GLenum> supported_formats{GetSupportedFormats()}; | ||
| 359 | const auto unspecialized{GenerateUnspecializedShaders(raws)}; | ||
| 360 | |||
| 361 | // Build shaders | ||
| 362 | for (std::size_t i = 0; i < usages.size(); ++i) { | ||
| 363 | const auto& usage{usages[i]}; | ||
| 364 | LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier, | ||
| 365 | i + 1, usages.size()); | ||
| 366 | |||
| 367 | const auto& unspec{unspecialized.at(usage.unique_identifier)}; | ||
| 368 | |||
| 369 | const auto precompiled_it = SearchPrecompiled(usage); | ||
| 370 | const bool is_precompiled = precompiled_it != precompiled.end(); | ||
| 371 | |||
| 372 | CachedProgram shader; | ||
| 373 | if (is_precompiled) { | ||
| 374 | shader = GeneratePrecompiledProgram(precompiled, *precompiled_it, supported_formats); | ||
| 375 | } | ||
| 376 | if (!shader) { | ||
| 377 | shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type, | ||
| 378 | usage.bindings, usage.primitive, true); | ||
| 379 | } | ||
| 380 | precompiled_programs.insert({usage, std::move(shader)}); | ||
| 381 | } | ||
| 382 | |||
| 383 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before | ||
| 384 | // precompiling them | ||
| 385 | |||
| 386 | for (std::size_t i = 0; i < usages.size(); ++i) { | ||
| 387 | const auto& usage{usages[i]}; | ||
| 388 | if (SearchPrecompiled(usage) == precompiled.end()) { | ||
| 389 | const auto& program = precompiled_programs.at(usage); | ||
| 390 | disk_cache.SavePrecompiled(usage, program->handle); | ||
| 203 | } | 391 | } |
| 204 | size += sizeof(inst); | ||
| 205 | offset++; | ||
| 206 | } | 392 | } |
| 207 | return size; | ||
| 208 | } | 393 | } |
| 209 | 394 | ||
| 210 | void CachedShader::CalculateProperties() { | 395 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 211 | setup.program.real_size = CalculateProgramSize(setup.program.code); | 396 | std::vector<ShaderDiskCachePrecompiledEntry>& precompiled, |
| 212 | setup.program.real_size_b = 0; | 397 | const ShaderDiskCachePrecompiledEntry& precompiled_entry, |
| 213 | setup.program.unique_identifier = Common::CityHash64( | 398 | const std::set<GLenum>& supported_formats) { |
| 214 | reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size); | 399 | |
| 215 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 400 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| 216 | std::size_t seed = 0; | 401 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); |
| 217 | boost::hash_combine(seed, setup.program.unique_identifier); | 402 | disk_cache.InvalidatePrecompiled(); |
| 218 | setup.program.real_size_b = CalculateProgramSize(setup.program.code_b); | 403 | precompiled.clear(); |
| 219 | const u64 identifier_b = Common::CityHash64( | 404 | return {}; |
| 220 | reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b); | ||
| 221 | boost::hash_combine(seed, identifier_b); | ||
| 222 | setup.program.unique_identifier = static_cast<u64>(seed); | ||
| 223 | } | 405 | } |
| 406 | |||
| 407 | CachedProgram shader = std::make_shared<OGLProgram>(); | ||
| 408 | shader->handle = glCreateProgram(); | ||
| 409 | glProgramBinary(shader->handle, precompiled_entry.binary_format, | ||
| 410 | precompiled_entry.binary.data(), | ||
| 411 | static_cast<GLsizei>(precompiled_entry.binary.size())); | ||
| 412 | |||
| 413 | GLint link_status{}; | ||
| 414 | glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); | ||
| 415 | if (link_status == GL_FALSE) { | ||
| 416 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); | ||
| 417 | disk_cache.InvalidatePrecompiled(); | ||
| 418 | precompiled.clear(); | ||
| 419 | |||
| 420 | shader.reset(); | ||
| 421 | } | ||
| 422 | |||
| 423 | return shader; | ||
| 224 | } | 424 | } |
| 225 | 425 | ||
| 226 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} | 426 | std::map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( |
| 427 | const std::vector<ShaderDiskCacheRaw>& raws) { | ||
| 428 | |||
| 429 | std::map<u64, UnspecializedShader> unspecialized; | ||
| 430 | for (const auto& raw : raws) { | ||
| 431 | const u64 calculated_hash = | ||
| 432 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); | ||
| 433 | if (raw.GetUniqueIdentifier() != calculated_hash) { | ||
| 434 | LOG_ERROR( | ||
| 435 | Render_OpenGL, | ||
| 436 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", | ||
| 437 | raw.GetUniqueIdentifier(), calculated_hash); | ||
| 438 | disk_cache.InvalidateTransferable(); | ||
| 439 | return {}; | ||
| 440 | } | ||
| 441 | |||
| 442 | auto result = | ||
| 443 | CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); | ||
| 444 | |||
| 445 | precompiled_shaders.insert({raw.GetUniqueIdentifier(), result}); | ||
| 227 | 446 | ||
| 228 | void ShaderCacheOpenGL::LoadDiskCache() {} | 447 | unspecialized.insert( |
| 448 | {raw.GetUniqueIdentifier(), | ||
| 449 | {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); | ||
| 450 | } | ||
| 451 | return unspecialized; | ||
| 452 | } | ||
| 229 | 453 | ||
| 230 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 454 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 231 | if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { | 455 | if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { |
| @@ -239,7 +463,23 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 239 | 463 | ||
| 240 | if (!shader) { | 464 | if (!shader) { |
| 241 | // No shader found - create a new one | 465 | // No shader found - create a new one |
| 242 | shader = std::make_shared<CachedShader>(program_addr, program); | 466 | ProgramCode program_code = GetShaderCode(program_addr); |
| 467 | ProgramCode program_code_b; | ||
| 468 | if (program == Maxwell::ShaderProgram::VertexA) { | ||
| 469 | program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); | ||
| 470 | } | ||
| 471 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | ||
| 472 | |||
| 473 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 474 | if (found != precompiled_shaders.end()) { | ||
| 475 | shader = | ||
| 476 | std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, | ||
| 477 | precompiled_programs, found->second); | ||
| 478 | } else { | ||
| 479 | shader = std::make_shared<CachedShader>( | ||
| 480 | program_addr, unique_identifier, program, disk_cache, precompiled_programs, | ||
| 481 | std::move(program_code), std::move(program_code_b)); | ||
| 482 | } | ||
| 243 | Register(shader); | 483 | Register(shader); |
| 244 | } | 484 | } |
| 245 | 485 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 18fb80bcc..763a47bce 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <map> | 8 | #include <map> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <set> | ||
| 10 | #include <tuple> | 11 | #include <tuple> |
| 11 | 12 | ||
| 12 | #include <glad/glad.h> | 13 | #include <glad/glad.h> |
| @@ -23,13 +24,25 @@ namespace OpenGL { | |||
| 23 | 24 | ||
| 24 | class CachedShader; | 25 | class CachedShader; |
| 25 | class RasterizerOpenGL; | 26 | class RasterizerOpenGL; |
| 27 | struct UnspecializedShader; | ||
| 26 | 28 | ||
| 27 | using Shader = std::shared_ptr<CachedShader>; | 29 | using Shader = std::shared_ptr<CachedShader>; |
| 30 | using CachedProgram = std::shared_ptr<OGLProgram>; | ||
| 28 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 31 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 32 | using PrecompiledPrograms = std::map<ShaderDiskCacheUsage, CachedProgram>; | ||
| 33 | using PrecompiledShaders = std::map<u64, GLShader::ProgramResult>; | ||
| 29 | 34 | ||
| 30 | class CachedShader final : public RasterizerCacheObject { | 35 | class CachedShader final : public RasterizerCacheObject { |
| 31 | public: | 36 | public: |
| 32 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); | 37 | explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, |
| 38 | ShaderDiskCacheOpenGL& disk_cache, | ||
| 39 | const PrecompiledPrograms& precompiled_programs, | ||
| 40 | ProgramCode&& program_code, ProgramCode&& program_code_b); | ||
| 41 | |||
| 42 | explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | ||
| 43 | ShaderDiskCacheOpenGL& disk_cache, | ||
| 44 | const PrecompiledPrograms& precompiled_programs, | ||
| 45 | GLShader::ProgramResult result); | ||
| 33 | 46 | ||
| 34 | VAddr GetAddr() const override { | 47 | VAddr GetAddr() const override { |
| 35 | return addr; | 48 | return addr; |
| @@ -56,33 +69,35 @@ private: | |||
| 56 | // declared by the hardware. Workaround this issue by generating a different shader per input | 69 | // declared by the hardware. Workaround this issue by generating a different shader per input |
| 57 | // topology class. | 70 | // topology class. |
| 58 | struct GeometryPrograms { | 71 | struct GeometryPrograms { |
| 59 | OGLProgram points; | 72 | CachedProgram points; |
| 60 | OGLProgram lines; | 73 | CachedProgram lines; |
| 61 | OGLProgram lines_adjacency; | 74 | CachedProgram lines_adjacency; |
| 62 | OGLProgram triangles; | 75 | CachedProgram triangles; |
| 63 | OGLProgram triangles_adjacency; | 76 | CachedProgram triangles_adjacency; |
| 64 | }; | 77 | }; |
| 65 | 78 | ||
| 66 | std::string AllocateBindings(BaseBindings base_bindings); | ||
| 67 | |||
| 68 | GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); | 79 | GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); |
| 69 | 80 | ||
| 70 | /// Generates a geometry shader or returns one that already exists. | 81 | /// Generates a geometry shader or returns one that already exists. |
| 71 | GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, | 82 | GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, |
| 72 | const std::string& glsl_topology, u32 max_vertices, | 83 | GLenum primitive_mode); |
| 73 | const std::string& debug_name); | 84 | |
| 85 | CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const; | ||
| 86 | |||
| 87 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | ||
| 74 | 88 | ||
| 75 | void CalculateProperties(); | 89 | const VAddr addr; |
| 90 | const u64 unique_identifier; | ||
| 91 | const Maxwell::ShaderProgram program_type; | ||
| 92 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 93 | const PrecompiledPrograms& precompiled_programs; | ||
| 76 | 94 | ||
| 77 | VAddr addr{}; | ||
| 78 | std::size_t shader_length{}; | 95 | std::size_t shader_length{}; |
| 79 | Maxwell::ShaderProgram program_type{}; | ||
| 80 | GLShader::ShaderSetup setup; | ||
| 81 | GLShader::ShaderEntries entries; | 96 | GLShader::ShaderEntries entries; |
| 82 | 97 | ||
| 83 | std::string code; | 98 | std::string code; |
| 84 | 99 | ||
| 85 | std::map<BaseBindings, OGLProgram> programs; | 100 | std::map<BaseBindings, CachedProgram> programs; |
| 86 | std::map<BaseBindings, GeometryPrograms> geometry_programs; | 101 | std::map<BaseBindings, GeometryPrograms> geometry_programs; |
| 87 | 102 | ||
| 88 | std::map<u32, GLuint> cbuf_resource_cache; | 103 | std::map<u32, GLuint> cbuf_resource_cache; |
| @@ -101,7 +116,19 @@ public: | |||
| 101 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 116 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 102 | 117 | ||
| 103 | private: | 118 | private: |
| 119 | std::map<u64, UnspecializedShader> GenerateUnspecializedShaders( | ||
| 120 | const std::vector<ShaderDiskCacheRaw>& raws); | ||
| 121 | |||
| 122 | CachedProgram GeneratePrecompiledProgram( | ||
| 123 | std::vector<ShaderDiskCachePrecompiledEntry>& precompiled, | ||
| 124 | const ShaderDiskCachePrecompiledEntry& precompiled_entry, | ||
| 125 | const std::set<GLenum>& supported_formats); | ||
| 126 | |||
| 104 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 127 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 128 | |||
| 129 | ShaderDiskCacheOpenGL disk_cache; | ||
| 130 | PrecompiledShaders precompiled_shaders; | ||
| 131 | PrecompiledPrograms precompiled_programs; | ||
| 105 | }; | 132 | }; |
| 106 | 133 | ||
| 107 | } // namespace OpenGL | 134 | } // namespace OpenGL |