diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 84 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/shader/registry.cpp | 59 | ||||
| -rw-r--r-- | src/video_core/shader/registry.h | 49 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 9 |
8 files changed, 176 insertions, 75 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 87d25b5a5..72a5dc82a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | |||
| 166 | 166 | ||
| 167 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | 167 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { |
| 168 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; | 168 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |
| 169 | auto registry = std::make_shared<Registry>(entry.type, guest_profile); | 169 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, |
| 170 | registry->SetBoundBuffer(entry.bound_buffer); | 170 | entry.graphics_info, entry.compute_info}; |
| 171 | const auto registry = std::make_shared<Registry>(entry.type, info); | ||
| 171 | for (const auto& [address, value] : entry.keys) { | 172 | for (const auto& [address, value] : entry.keys) { |
| 172 | const auto [buffer, offset] = address; | 173 | const auto [buffer, offset] = address; |
| 173 | registry->InsertKey(buffer, offset, value); | 174 | registry->InsertKey(buffer, offset, value); |
| @@ -184,9 +185,9 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | |||
| 184 | 185 | ||
| 185 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, | 186 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, |
| 186 | u64 unique_identifier, const ShaderIR& ir, | 187 | u64 unique_identifier, const ShaderIR& ir, |
| 187 | bool hint_retrievable = false) { | 188 | const Registry& registry, bool hint_retrievable = false) { |
| 188 | LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); | 189 | LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); |
| 189 | const std::string glsl = DecompileShader(device, ir, shader_type); | 190 | const std::string glsl = DecompileShader(device, ir, registry, shader_type); |
| 190 | OGLShader shader; | 191 | OGLShader shader; |
| 191 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | 192 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |
| 192 | 193 | ||
| @@ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 239 | // if (!code_b.empty()) { | 240 | // if (!code_b.empty()) { |
| 240 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | 241 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); |
| 241 | // } | 242 | // } |
| 242 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); | 243 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); |
| 243 | 244 | ||
| 244 | ShaderDiskCacheEntry entry; | 245 | ShaderDiskCacheEntry entry; |
| 245 | entry.type = shader_type; | 246 | entry.type = shader_type; |
| @@ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 247 | entry.code_b = std::move(code_b); | 248 | entry.code_b = std::move(code_b); |
| 248 | entry.unique_identifier = params.unique_identifier; | 249 | entry.unique_identifier = params.unique_identifier; |
| 249 | entry.bound_buffer = registry->GetBoundBuffer(); | 250 | entry.bound_buffer = registry->GetBoundBuffer(); |
| 251 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 250 | entry.keys = registry->GetKeys(); | 252 | entry.keys = registry->GetKeys(); |
| 251 | entry.bound_samplers = registry->GetBoundSamplers(); | 253 | entry.bound_samplers = registry->GetBoundSamplers(); |
| 252 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| @@ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 260 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 262 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 261 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 262 | 264 | ||
| 263 | auto registry = | 265 | auto& engine = params.system.GPU().KeplerCompute(); |
| 264 | std::make_shared<Registry>(ShaderType::Compute, params.system.GPU().KeplerCompute()); | 266 | auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); |
| 265 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | 267 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 266 | auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); | 268 | const u64 uid = params.unique_identifier; |
| 269 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); | ||
| 267 | 270 | ||
| 268 | ShaderDiskCacheEntry entry; | 271 | ShaderDiskCacheEntry entry; |
| 269 | entry.type = ShaderType::Compute; | 272 | entry.type = ShaderType::Compute; |
| 270 | entry.code = std::move(code); | 273 | entry.code = std::move(code); |
| 271 | entry.unique_identifier = params.unique_identifier; | 274 | entry.unique_identifier = uid; |
| 272 | entry.bound_buffer = registry->GetBoundBuffer(); | 275 | entry.bound_buffer = registry->GetBoundBuffer(); |
| 276 | entry.compute_info = registry->GetComputeInfo(); | ||
| 273 | entry.keys = registry->GetKeys(); | 277 | entry.keys = registry->GetKeys(); |
| 274 | entry.bound_samplers = registry->GetBoundSamplers(); | 278 | entry.bound_samplers = registry->GetBoundSamplers(); |
| 275 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 279 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| @@ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 331 | return; | 335 | return; |
| 332 | } | 336 | } |
| 333 | const auto& entry = (*transferable)[i]; | 337 | const auto& entry = (*transferable)[i]; |
| 334 | const u64 unique_identifier = entry.unique_identifier; | 338 | const u64 uid = entry.unique_identifier; |
| 335 | const auto it = find_precompiled(unique_identifier); | 339 | const auto it = find_precompiled(uid); |
| 336 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | 340 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; |
| 337 | 341 | ||
| 338 | const bool is_compute = entry.type == ShaderType::Compute; | 342 | const bool is_compute = entry.type == ShaderType::Compute; |
| @@ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 350 | } | 354 | } |
| 351 | if (!program) { | 355 | if (!program) { |
| 352 | // Otherwise compile it from GLSL | 356 | // Otherwise compile it from GLSL |
| 353 | program = BuildShader(device, entry.type, unique_identifier, ir, true); | 357 | program = BuildShader(device, entry.type, uid, ir, *registry, true); |
| 354 | } | 358 | } |
| 355 | 359 | ||
| 356 | PrecompiledShader shader; | 360 | PrecompiledShader shader; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 308e57aae..48a25f1f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode; | |||
| 36 | using Tegra::Shader::IpaMode; | 36 | using Tegra::Shader::IpaMode; |
| 37 | using Tegra::Shader::IpaSampleMode; | 37 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::Register; | 38 | using Tegra::Shader::Register; |
| 39 | using VideoCommon::Shader::Registry; | ||
| 39 | 40 | ||
| 40 | using namespace std::string_literals; | 41 | using namespace std::string_literals; |
| 41 | using namespace VideoCommon::Shader; | 42 | using namespace VideoCommon::Shader; |
| @@ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | |||
| 288 | } | 289 | } |
| 289 | } | 290 | } |
| 290 | 291 | ||
| 292 | /// Describes primitive behavior on geometry shaders | ||
| 293 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 294 | switch (topology) { | ||
| 295 | case Maxwell::PrimitiveTopology::Points: | ||
| 296 | return {"points", 1}; | ||
| 297 | case Maxwell::PrimitiveTopology::Lines: | ||
| 298 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 299 | return {"lines", 2}; | ||
| 300 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 301 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 302 | return {"lines_adjacency", 4}; | ||
| 303 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 304 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 305 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 306 | return {"triangles", 3}; | ||
| 307 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 308 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 309 | return {"triangles_adjacency", 6}; | ||
| 310 | default: | ||
| 311 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 312 | return {"points", 1}; | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 291 | /// Generates code to use for a swizzle operation. | 316 | /// Generates code to use for a swizzle operation. |
| 292 | constexpr const char* GetSwizzle(std::size_t element) { | 317 | constexpr const char* GetSwizzle(std::size_t element) { |
| 293 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | 318 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |
| @@ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 367 | 392 | ||
| 368 | class GLSLDecompiler final { | 393 | class GLSLDecompiler final { |
| 369 | public: | 394 | public: |
| 370 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, | 395 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 371 | std::string_view suffix) | 396 | ShaderType stage, std::string_view suffix) |
| 372 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 397 | : device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix}, |
| 398 | header{ir.GetHeader()} {} | ||
| 373 | 399 | ||
| 374 | void Decompile() { | 400 | void Decompile() { |
| 375 | DeclareHeader(); | 401 | DeclareHeader(); |
| 376 | DeclareVertex(); | 402 | DeclareVertex(); |
| 377 | DeclareGeometry(); | 403 | DeclareGeometry(); |
| 378 | DeclareFragment(); | 404 | DeclareFragment(); |
| 405 | DeclareCompute(); | ||
| 379 | DeclareRegisters(); | 406 | DeclareRegisters(); |
| 380 | DeclareCustomVariables(); | 407 | DeclareCustomVariables(); |
| 381 | DeclarePredicates(); | 408 | DeclarePredicates(); |
| @@ -489,9 +516,15 @@ private: | |||
| 489 | return; | 516 | return; |
| 490 | } | 517 | } |
| 491 | 518 | ||
| 519 | const auto& info = registry.GetGraphicsInfo(); | ||
| 520 | const auto input_topology = info.primitive_topology; | ||
| 521 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 522 | max_input_vertices = max_vertices; | ||
| 523 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 524 | |||
| 492 | const auto topology = GetTopologyName(header.common3.output_topology); | 525 | const auto topology = GetTopologyName(header.common3.output_topology); |
| 493 | const auto max_vertices = header.common4.max_output_vertices.Value(); | 526 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); |
| 494 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); | 527 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); |
| 495 | code.AddNewLine(); | 528 | code.AddNewLine(); |
| 496 | 529 | ||
| 497 | code.AddLine("in gl_PerVertex {{"); | 530 | code.AddLine("in gl_PerVertex {{"); |
| @@ -513,7 +546,8 @@ private: | |||
| 513 | if (!IsRenderTargetEnabled(render_target)) { | 546 | if (!IsRenderTargetEnabled(render_target)) { |
| 514 | continue; | 547 | continue; |
| 515 | } | 548 | } |
| 516 | code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target); | 549 | code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, |
| 550 | render_target); | ||
| 517 | any = true; | 551 | any = true; |
| 518 | } | 552 | } |
| 519 | if (any) { | 553 | if (any) { |
| @@ -521,6 +555,20 @@ private: | |||
| 521 | } | 555 | } |
| 522 | } | 556 | } |
| 523 | 557 | ||
| 558 | void DeclareCompute() { | ||
| 559 | if (stage != ShaderType::Compute) { | ||
| 560 | return; | ||
| 561 | } | ||
| 562 | const auto& info = registry.GetComputeInfo(); | ||
| 563 | if (const u32 size = info.shared_memory_size_in_words; size > 0) { | ||
| 564 | code.AddLine("shared uint smem[];", size); | ||
| 565 | code.AddNewLine(); | ||
| 566 | } | ||
| 567 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 568 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 569 | code.AddNewLine(); | ||
| 570 | } | ||
| 571 | |||
| 524 | void DeclareVertexRedeclarations() { | 572 | void DeclareVertexRedeclarations() { |
| 525 | code.AddLine("out gl_PerVertex {{"); | 573 | code.AddLine("out gl_PerVertex {{"); |
| 526 | ++code.scope; | 574 | ++code.scope; |
| @@ -596,18 +644,16 @@ private: | |||
| 596 | } | 644 | } |
| 597 | 645 | ||
| 598 | void DeclareLocalMemory() { | 646 | void DeclareLocalMemory() { |
| 647 | u64 local_memory_size = 0; | ||
| 599 | if (stage == ShaderType::Compute) { | 648 | if (stage == ShaderType::Compute) { |
| 600 | code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); | 649 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; |
| 601 | code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); | 650 | } else { |
| 602 | code.AddLine("#endif"); | 651 | local_memory_size = header.GetLocalMemorySize(); |
| 603 | return; | ||
| 604 | } | 652 | } |
| 605 | |||
| 606 | const u64 local_memory_size = header.GetLocalMemorySize(); | ||
| 607 | if (local_memory_size == 0) { | 653 | if (local_memory_size == 0) { |
| 608 | return; | 654 | return; |
| 609 | } | 655 | } |
| 610 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 656 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 611 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | 657 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); |
| 612 | code.AddNewLine(); | 658 | code.AddNewLine(); |
| 613 | } | 659 | } |
| @@ -996,7 +1042,8 @@ private: | |||
| 996 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 1042 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 997 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 1043 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 998 | // this happens and what's its intent. | 1044 | // this happens and what's its intent. |
| 999 | return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); | 1045 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), |
| 1046 | max_input_vertices.value()); | ||
| 1000 | } | 1047 | } |
| 1001 | return std::string(name); | 1048 | return std::string(name); |
| 1002 | }; | 1049 | }; |
| @@ -2428,11 +2475,14 @@ private: | |||
| 2428 | 2475 | ||
| 2429 | const Device& device; | 2476 | const Device& device; |
| 2430 | const ShaderIR& ir; | 2477 | const ShaderIR& ir; |
| 2478 | const Registry& registry; | ||
| 2431 | const ShaderType stage; | 2479 | const ShaderType stage; |
| 2432 | const std::string_view suffix; | 2480 | const std::string_view suffix; |
| 2433 | const Header header; | 2481 | const Header header; |
| 2434 | 2482 | ||
| 2435 | ShaderWriter code; | 2483 | ShaderWriter code; |
| 2484 | |||
| 2485 | std::optional<u32> max_input_vertices; | ||
| 2436 | }; | 2486 | }; |
| 2437 | 2487 | ||
| 2438 | std::string GetFlowVariable(u32 index) { | 2488 | std::string GetFlowVariable(u32 index) { |
| @@ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2647 | return entries; | 2697 | return entries; |
| 2648 | } | 2698 | } |
| 2649 | 2699 | ||
| 2650 | std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage, | 2700 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 2651 | std::string_view suffix) { | 2701 | ShaderType stage, std::string_view suffix) { |
| 2652 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2702 | GLSLDecompiler decompiler(device, ir, registry, stage, suffix); |
| 2653 | decompiler.Decompile(); | 2703 | decompiler.Decompile(); |
| 2654 | return decompiler.GetResult(); | 2704 | return decompiler.GetResult(); |
| 2655 | } | 2705 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index ae97ab504..68b68ee77 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,12 +12,9 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 14 | #include "video_core/engines/shader_type.h" | 14 | #include "video_core/engines/shader_type.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | class ShaderIR; | ||
| 19 | } | ||
| 20 | |||
| 21 | namespace OpenGL { | 18 | namespace OpenGL { |
| 22 | 19 | ||
| 23 | class Device; | 20 | class Device; |
| @@ -80,6 +77,7 @@ struct ShaderEntries { | |||
| 80 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); | 77 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 81 | 78 | ||
| 82 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 79 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 80 | const VideoCommon::Shader::Registry& registry, | ||
| 83 | Tegra::Engines::ShaderType stage, std::string_view suffix = {}); | 81 | Tegra::Engines::ShaderType stage, std::string_view suffix = {}); |
| 84 | 82 | ||
| 85 | } // namespace OpenGL | 83 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 0e1717c5e..5d5118058 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -48,7 +48,7 @@ struct BindlessSamplerKey { | |||
| 48 | Tegra::Engines::SamplerDescriptor sampler; | 48 | Tegra::Engines::SamplerDescriptor sampler; |
| 49 | }; | 49 | }; |
| 50 | 50 | ||
| 51 | constexpr u32 NativeVersion = 16; | 51 | constexpr u32 NativeVersion = 17; |
| 52 | 52 | ||
| 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 54 | ShaderCacheVersionHash hash{}; | 54 | ShaderCacheVersionHash hash{}; |
| @@ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 83 | return false; | 83 | return false; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | bool is_texture_handler_size_known; | 86 | u8 is_texture_handler_size_known; |
| 87 | u32 texture_handler_size_value; | 87 | u32 texture_handler_size_value; |
| 88 | u32 num_keys; | 88 | u32 num_keys; |
| 89 | u32 num_bound_samplers; | 89 | u32 num_bound_samplers; |
| 90 | u32 num_bindless_samplers; | 90 | u32 num_bindless_samplers; |
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || |
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || |
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || |
| 94 | file.ReadArray(&num_bound_samplers, 1) != 1 || | 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || |
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 95 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { |
| 96 | return false; | 97 | return false; |
| 97 | } | 98 | } |
| @@ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 136 | } | 137 | } |
| 137 | 138 | ||
| 138 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || | 139 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || |
| 139 | file.WriteObject(texture_handler_size.has_value()) != 1 || | 140 | file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || |
| 140 | file.WriteObject(texture_handler_size.value_or(0)) != 1 || | 141 | file.WriteObject(texture_handler_size.value_or(0)) != 1 || |
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | ||
| 141 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || |
| 142 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || |
| 143 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 7f2ab36be..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -51,8 +51,10 @@ struct ShaderDiskCacheEntry { | |||
| 51 | ProgramCode code_b; | 51 | ProgramCode code_b; |
| 52 | 52 | ||
| 53 | u64 unique_identifier = 0; | 53 | u64 unique_identifier = 0; |
| 54 | u32 bound_buffer = 0; | ||
| 55 | std::optional<u32> texture_handler_size; | 54 | std::optional<u32> texture_handler_size; |
| 55 | u32 bound_buffer = 0; | ||
| 56 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 56 | VideoCommon::Shader::KeyMap keys; | 58 | VideoCommon::Shader::KeyMap keys; |
| 57 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; |
| 58 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index 7126caf98..dc2d3dce3 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp | |||
| @@ -6,21 +6,55 @@ | |||
| 6 | #include <tuple> | 6 | #include <tuple> |
| 7 | 7 | ||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/engines/kepler_compute.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/engines/shader_type.h" | 11 | #include "video_core/engines/shader_type.h" |
| 11 | #include "video_core/shader/registry.h" | 12 | #include "video_core/shader/registry.h" |
| 12 | 13 | ||
| 13 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| 14 | 15 | ||
| 16 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 15 | using Tegra::Engines::SamplerDescriptor; | 17 | using Tegra::Engines::SamplerDescriptor; |
| 18 | using Tegra::Engines::ShaderType; | ||
| 16 | 19 | ||
| 17 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | 20 | namespace { |
| 18 | VideoCore::GuestDriverProfile stored_guest_driver_profile) | 21 | |
| 19 | : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} | 22 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { |
| 23 | if (shader_stage == ShaderType::Compute) { | ||
| 24 | return {}; | ||
| 25 | } | ||
| 26 | auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 27 | |||
| 28 | GraphicsInfo info; | ||
| 29 | info.primitive_topology = graphics.regs.draw.topology; | ||
| 30 | return info; | ||
| 31 | } | ||
| 32 | |||
| 33 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 34 | if (shader_stage != ShaderType::Compute) { | ||
| 35 | return {}; | ||
| 36 | } | ||
| 37 | auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 38 | const auto& launch = compute.launch_description; | ||
| 39 | |||
| 40 | ComputeInfo info; | ||
| 41 | info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; | ||
| 42 | info.local_memory_size_in_words = launch.local_pos_alloc; | ||
| 43 | info.shared_memory_size_in_words = launch.shared_alloc; | ||
| 44 | return info; | ||
| 45 | } | ||
| 46 | |||
| 47 | } // Anonymous namespace | ||
| 48 | |||
| 49 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 50 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 51 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 20 | 52 | ||
| 21 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | 53 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, |
| 22 | Tegra::Engines::ConstBufferEngineInterface& engine) | 54 | Tegra::Engines::ConstBufferEngineInterface& engine) |
| 23 | : stage{shader_stage}, engine{&engine} {} | 55 | : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, |
| 56 | graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( | ||
| 57 | shader_stage, engine)} {} | ||
| 24 | 58 | ||
| 25 | Registry::~Registry() = default; | 59 | Registry::~Registry() = default; |
| 26 | 60 | ||
| @@ -67,18 +101,6 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler | |||
| 67 | return value; | 101 | return value; |
| 68 | } | 102 | } |
| 69 | 103 | ||
| 70 | std::optional<u32> Registry::ObtainBoundBuffer() { | ||
| 71 | if (bound_buffer_saved) { | ||
| 72 | return bound_buffer; | ||
| 73 | } | ||
| 74 | if (!engine) { | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | bound_buffer_saved = true; | ||
| 78 | bound_buffer = engine->GetBoundBuffer(); | ||
| 79 | return bound_buffer; | ||
| 80 | } | ||
| 81 | |||
| 82 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | 104 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { |
| 83 | keys.insert_or_assign({buffer, offset}, value); | 105 | keys.insert_or_assign({buffer, offset}, value); |
| 84 | } | 106 | } |
| @@ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s | |||
| 91 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | 113 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); |
| 92 | } | 114 | } |
| 93 | 115 | ||
| 94 | void Registry::SetBoundBuffer(u32 buffer) { | ||
| 95 | bound_buffer_saved = true; | ||
| 96 | bound_buffer = buffer; | ||
| 97 | } | ||
| 98 | |||
| 99 | bool Registry::IsConsistent() const { | 116 | bool Registry::IsConsistent() const { |
| 100 | if (!engine) { | 117 | if (!engine) { |
| 101 | return true; | 118 | return true; |
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index a5487e1d7..c1a04ea02 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h | |||
| @@ -4,11 +4,16 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 9 | #include <type_traits> | ||
| 8 | #include <unordered_map> | 10 | #include <unordered_map> |
| 11 | #include <utility> | ||
| 12 | |||
| 9 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 10 | #include "common/hash.h" | 14 | #include "common/hash.h" |
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | 15 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 13 | #include "video_core/guest_driver.h" | 18 | #include "video_core/guest_driver.h" |
| 14 | 19 | ||
| @@ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescripto | |||
| 19 | using BindlessSamplerMap = | 24 | using BindlessSamplerMap = |
| 20 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; |
| 21 | 26 | ||
| 27 | struct GraphicsInfo { | ||
| 28 | Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; | ||
| 29 | }; | ||
| 30 | static_assert(std::is_trivially_copyable_v<GraphicsInfo>); | ||
| 31 | |||
| 32 | struct ComputeInfo { | ||
| 33 | std::array<u32, 3> workgroup_size{}; | ||
| 34 | u32 shared_memory_size_in_words = 0; | ||
| 35 | u32 local_memory_size_in_words = 0; | ||
| 36 | }; | ||
| 37 | static_assert(std::is_trivially_copyable_v<ComputeInfo>); | ||
| 38 | |||
| 39 | struct SerializedRegistryInfo { | ||
| 40 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 41 | u32 bound_buffer = 0; | ||
| 42 | GraphicsInfo graphics; | ||
| 43 | ComputeInfo compute; | ||
| 44 | }; | ||
| 45 | |||
| 22 | /** | 46 | /** |
| 23 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | 47 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. |
| 24 | * With it, the shader can obtain required data from GPU state and store it for disk shader | 48 | * With it, the shader can obtain required data from GPU state and store it for disk shader |
| @@ -26,8 +50,7 @@ using BindlessSamplerMap = | |||
| 26 | */ | 50 | */ |
| 27 | class Registry { | 51 | class Registry { |
| 28 | public: | 52 | public: |
| 29 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | 53 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); |
| 30 | VideoCore::GuestDriverProfile stored_guest_driver_profile); | ||
| 31 | 54 | ||
| 32 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | 55 | explicit Registry(Tegra::Engines::ShaderType shader_stage, |
| 33 | Tegra::Engines::ConstBufferEngineInterface& engine); | 56 | Tegra::Engines::ConstBufferEngineInterface& engine); |
| @@ -42,8 +65,6 @@ public: | |||
| 42 | 65 | ||
| 43 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | 66 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |
| 44 | 67 | ||
| 45 | std::optional<u32> ObtainBoundBuffer(); | ||
| 46 | |||
| 47 | /// Inserts a key. | 68 | /// Inserts a key. |
| 48 | void InsertKey(u32 buffer, u32 offset, u32 value); | 69 | void InsertKey(u32 buffer, u32 offset, u32 value); |
| 49 | 70 | ||
| @@ -53,9 +74,6 @@ public: | |||
| 53 | /// Inserts a bindless sampler key. | 74 | /// Inserts a bindless sampler key. |
| 54 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | 75 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); |
| 55 | 76 | ||
| 56 | /// Set the bound buffer for this registry. | ||
| 57 | void SetBoundBuffer(u32 buffer); | ||
| 58 | |||
| 59 | /// Checks keys and samplers against engine's current const buffers. | 77 | /// Checks keys and samplers against engine's current const buffers. |
| 60 | /// Returns true if they are the same value, false otherwise. | 78 | /// Returns true if they are the same value, false otherwise. |
| 61 | bool IsConsistent() const; | 79 | bool IsConsistent() const; |
| @@ -83,6 +101,18 @@ public: | |||
| 83 | return bound_buffer; | 101 | return bound_buffer; |
| 84 | } | 102 | } |
| 85 | 103 | ||
| 104 | /// Returns compute information from this shader | ||
| 105 | const GraphicsInfo& GetGraphicsInfo() const { | ||
| 106 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 107 | return graphics_info; | ||
| 108 | } | ||
| 109 | |||
| 110 | /// Returns compute information from this shader | ||
| 111 | const ComputeInfo& GetComputeInfo() const { | ||
| 112 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 113 | return compute_info; | ||
| 114 | } | ||
| 115 | |||
| 86 | /// Obtains access to the guest driver's profile. | 116 | /// Obtains access to the guest driver's profile. |
| 87 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | 117 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { |
| 88 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | 118 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; |
| @@ -95,8 +125,9 @@ private: | |||
| 95 | KeyMap keys; | 125 | KeyMap keys; |
| 96 | BoundSamplerMap bound_samplers; | 126 | BoundSamplerMap bound_samplers; |
| 97 | BindlessSamplerMap bindless_samplers; | 127 | BindlessSamplerMap bindless_samplers; |
| 98 | bool bound_buffer_saved{}; | 128 | u32 bound_buffer; |
| 99 | u32 bound_buffer{}; | 129 | GraphicsInfo graphics_info; |
| 130 | ComputeInfo compute_info; | ||
| 100 | }; | 131 | }; |
| 101 | 132 | ||
| 102 | } // namespace VideoCommon::Shader | 133 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 831219841..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -81,14 +81,11 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); |
| 82 | return {tracked, track}; | 82 | return {tracked, track}; |
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 84 | const auto bound_buffer = registry.ObtainBoundBuffer(); | 84 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 85 | if (!bound_buffer) { | 85 | if (bound_buffer != cbuf->GetIndex()) { |
| 86 | return {}; | 86 | return {}; |
| 87 | } | 87 | } |
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | 88 | const auto pair = DecoupleIndirectRead(*operation); |
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | 89 | if (!pair) { |
| 93 | return {}; | 90 | return {}; |
| 94 | } | 91 | } |