diff options
| author | 2020-02-26 16:13:47 -0300 | |
|---|---|---|
| committer | 2020-03-09 18:40:06 -0300 | |
| commit | bd8b9bbcee93549f323352f227ff44d0e79e0ad4 (patch) | |
| tree | 5c68e7ab171db3f3d31bf27cd573c54a918f0708 /src | |
| parent | Merge pull request #3301 from ReinUsesLisp/state-tracker (diff) | |
| download | yuzu-bd8b9bbcee93549f323352f227ff44d0e79e0ad4.tar.gz yuzu-bd8b9bbcee93549f323352f227ff44d0e79e0ad4.tar.xz yuzu-bd8b9bbcee93549f323352f227ff44d0e79e0ad4.zip | |
gl_shader_cache: Rework shader cache and remove post-specializations
Instead of pre-specializing shaders and then post-specializing them,
drop the later and only "specialize" the shader while decoding it.
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/guest_driver.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/guest_driver.h | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 503 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 97 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 201 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 402 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 149 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 109 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 34 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 11 | ||||
| -rw-r--r-- | src/video_core/shader/decode.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 9 |
18 files changed, 544 insertions, 1094 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9afc6105d..274e4ec79 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |
| 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |
| 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |
| 41 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" | ||
| 42 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" | ||
| 43 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | 41 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |
| 44 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | 42 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |
| 45 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | 43 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 14f3b4569..3d93c07fb 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -65,8 +65,6 @@ add_library(video_core STATIC | |||
| 65 | renderer_opengl/gl_shader_decompiler.h | 65 | renderer_opengl/gl_shader_decompiler.h |
| 66 | renderer_opengl/gl_shader_disk_cache.cpp | 66 | renderer_opengl/gl_shader_disk_cache.cpp |
| 67 | renderer_opengl/gl_shader_disk_cache.h | 67 | renderer_opengl/gl_shader_disk_cache.h |
| 68 | renderer_opengl/gl_shader_gen.cpp | ||
| 69 | renderer_opengl/gl_shader_gen.h | ||
| 70 | renderer_opengl/gl_shader_manager.cpp | 68 | renderer_opengl/gl_shader_manager.cpp |
| 71 | renderer_opengl/gl_shader_manager.h | 69 | renderer_opengl/gl_shader_manager.h |
| 72 | renderer_opengl/gl_shader_util.cpp | 70 | renderer_opengl/gl_shader_util.cpp |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index 6adef459e..f058f2744 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp | |||
| @@ -4,13 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <vector> | ||
| 7 | 8 | ||
| 9 | #include "common/common_types.h" | ||
| 8 | #include "video_core/guest_driver.h" | 10 | #include "video_core/guest_driver.h" |
| 9 | 11 | ||
| 10 | namespace VideoCore { | 12 | namespace VideoCore { |
| 11 | 13 | ||
| 12 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | 14 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { |
| 13 | if (texture_handler_size_deduced) { | 15 | if (texture_handler_size) { |
| 14 | return; | 16 | return; |
| 15 | } | 17 | } |
| 16 | const std::size_t size = bound_offsets.size(); | 18 | const std::size_t size = bound_offsets.size(); |
| @@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse | |||
| 29 | if (min_val > 2) { | 31 | if (min_val > 2) { |
| 30 | return; | 32 | return; |
| 31 | } | 33 | } |
| 32 | texture_handler_size_deduced = true; | ||
| 33 | texture_handler_size = min_texture_handler_size * min_val; | 34 | texture_handler_size = min_texture_handler_size * min_val; |
| 34 | } | 35 | } |
| 35 | 36 | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index fc1917347..99450777e 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -17,25 +18,29 @@ namespace VideoCore { | |||
| 17 | */ | 18 | */ |
| 18 | class GuestDriverProfile { | 19 | class GuestDriverProfile { |
| 19 | public: | 20 | public: |
| 20 | void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | 21 | explicit GuestDriverProfile() = default; |
| 22 | explicit GuestDriverProfile(std::optional<u32> texture_handler_size) | ||
| 23 | : texture_handler_size{texture_handler_size} {} | ||
| 24 | |||
| 25 | void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); | ||
| 21 | 26 | ||
| 22 | u32 GetTextureHandlerSize() const { | 27 | u32 GetTextureHandlerSize() const { |
| 23 | return texture_handler_size; | 28 | return texture_handler_size.value_or(default_texture_handler_size); |
| 24 | } | 29 | } |
| 25 | 30 | ||
| 26 | bool TextureHandlerSizeKnown() const { | 31 | bool IsTextureHandlerSizeKnown() const { |
| 27 | return texture_handler_size_deduced; | 32 | return texture_handler_size.has_value(); |
| 28 | } | 33 | } |
| 29 | 34 | ||
| 30 | private: | 35 | private: |
| 31 | // Minimum size of texture handler any driver can use. | 36 | // Minimum size of texture handler any driver can use. |
| 32 | static constexpr u32 min_texture_handler_size = 4; | 37 | static constexpr u32 min_texture_handler_size = 4; |
| 33 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily | 38 | |
| 34 | // use 4 bytes instead. Thus, certain drivers may squish the size. | 39 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. |
| 40 | // Thus, certain drivers may squish the size. | ||
| 35 | static constexpr u32 default_texture_handler_size = 8; | 41 | static constexpr u32 default_texture_handler_size = 8; |
| 36 | 42 | ||
| 37 | u32 texture_handler_size = default_texture_handler_size; | 43 | std::optional<u32> texture_handler_size = default_texture_handler_size; |
| 38 | bool texture_handler_size_deduced = false; | ||
| 39 | }; | 44 | }; |
| 40 | 45 | ||
| 41 | } // namespace VideoCore | 46 | } // namespace VideoCore |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 55324e6d5..385a31ef6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 32 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 33 | #include "video_core/renderer_opengl/renderer_opengl.h" | 32 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 34 | 33 | ||
| @@ -76,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 76 | } | 75 | } |
| 77 | 76 | ||
| 78 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 77 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| 79 | const GLShader::ConstBufferEntry& entry) { | 78 | const ConstBufferEntry& entry) { |
| 80 | if (!entry.IsIndirect()) { | 79 | if (!entry.IsIndirect()) { |
| 81 | return entry.GetSize(); | 80 | return entry.GetSize(); |
| 82 | } | 81 | } |
| @@ -272,9 +271,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 272 | SetupDrawTextures(stage, shader); | 271 | SetupDrawTextures(stage, shader); |
| 273 | SetupDrawImages(stage, shader); | 272 | SetupDrawImages(stage, shader); |
| 274 | 273 | ||
| 275 | const ProgramVariant variant(primitive_mode); | 274 | const GLuint program_handle = shader->GetHandle(); |
| 276 | const auto program_handle = shader->GetHandle(variant); | ||
| 277 | |||
| 278 | switch (program) { | 275 | switch (program) { |
| 279 | case Maxwell::ShaderProgram::VertexA: | 276 | case Maxwell::ShaderProgram::VertexA: |
| 280 | case Maxwell::ShaderProgram::VertexB: | 277 | case Maxwell::ShaderProgram::VertexB: |
| @@ -295,7 +292,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 295 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 292 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 296 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 293 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| 297 | // clip distances only when it's written by a shader stage. | 294 | // clip distances only when it's written by a shader stage. |
| 298 | clip_distances |= shader->GetShaderEntries().clip_distances; | 295 | clip_distances |= shader->GetEntries().clip_distances; |
| 299 | 296 | ||
| 300 | // When VertexA is enabled, we have dual vertex shaders | 297 | // When VertexA is enabled, we have dual vertex shaders |
| 301 | if (program == Maxwell::ShaderProgram::VertexA) { | 298 | if (program == Maxwell::ShaderProgram::VertexA) { |
| @@ -622,13 +619,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 622 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 619 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 623 | SetupComputeTextures(kernel); | 620 | SetupComputeTextures(kernel); |
| 624 | SetupComputeImages(kernel); | 621 | SetupComputeImages(kernel); |
| 625 | 622 | glUseProgramStages(program_manager.GetHandle(), GL_COMPUTE_SHADER_BIT, kernel->GetHandle()); | |
| 626 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 627 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, | ||
| 628 | launch_desc.block_dim_z, launch_desc.shared_alloc, | ||
| 629 | launch_desc.local_pos_alloc); | ||
| 630 | glUseProgramStages(program_manager.GetHandle(), GL_COMPUTE_SHADER_BIT, | ||
| 631 | kernel->GetHandle(variant)); | ||
| 632 | 623 | ||
| 633 | const std::size_t buffer_size = | 624 | const std::size_t buffer_size = |
| 634 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 625 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -646,6 +637,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 646 | bind_ubo_pushbuffer.Bind(); | 637 | bind_ubo_pushbuffer.Bind(); |
| 647 | bind_ssbo_pushbuffer.Bind(); | 638 | bind_ssbo_pushbuffer.Bind(); |
| 648 | 639 | ||
| 640 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 649 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 641 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 650 | ++num_queued_commands; | 642 | ++num_queued_commands; |
| 651 | } | 643 | } |
| @@ -750,7 +742,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad | |||
| 750 | const auto& shader_stage = stages[stage_index]; | 742 | const auto& shader_stage = stages[stage_index]; |
| 751 | 743 | ||
| 752 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 744 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; |
| 753 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 745 | for (const auto& entry : shader->GetEntries().const_buffers) { |
| 754 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 746 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 755 | SetupConstBuffer(binding++, buffer, entry); | 747 | SetupConstBuffer(binding++, buffer, entry); |
| 756 | } | 748 | } |
| @@ -761,7 +753,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 761 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 753 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 762 | 754 | ||
| 763 | u32 binding = 0; | 755 | u32 binding = 0; |
| 764 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | 756 | for (const auto& entry : kernel->GetEntries().const_buffers) { |
| 765 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 757 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 766 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 758 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 767 | Tegra::Engines::ConstBufferInfo buffer; | 759 | Tegra::Engines::ConstBufferInfo buffer; |
| @@ -773,7 +765,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 773 | } | 765 | } |
| 774 | 766 | ||
| 775 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 767 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 776 | const GLShader::ConstBufferEntry& entry) { | 768 | const ConstBufferEntry& entry) { |
| 777 | if (!buffer.enabled) { | 769 | if (!buffer.enabled) { |
| 778 | // Set values to zero to unbind buffers | 770 | // Set values to zero to unbind buffers |
| 779 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 771 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, |
| @@ -797,7 +789,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 797 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 789 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 798 | 790 | ||
| 799 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 791 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; |
| 800 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 792 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 801 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 793 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 802 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 794 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 803 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 795 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -811,7 +803,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 811 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 803 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 812 | 804 | ||
| 813 | u32 binding = 0; | 805 | u32 binding = 0; |
| 814 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | 806 | for (const auto& entry : kernel->GetEntries().global_memory_entries) { |
| 815 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | 807 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; |
| 816 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 808 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 817 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 809 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -819,7 +811,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 819 | } | 811 | } |
| 820 | } | 812 | } |
| 821 | 813 | ||
| 822 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, | 814 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 823 | GPUVAddr gpu_addr, std::size_t size) { | 815 | GPUVAddr gpu_addr, std::size_t size) { |
| 824 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 816 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 825 | const auto [ssbo, buffer_offset] = | 817 | const auto [ssbo, buffer_offset] = |
| @@ -831,7 +823,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 831 | MICROPROFILE_SCOPE(OpenGL_Texture); | 823 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 832 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 824 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 833 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 825 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 834 | for (const auto& entry : shader->GetShaderEntries().samplers) { | 826 | for (const auto& entry : shader->GetEntries().samplers) { |
| 835 | const auto shader_type = static_cast<ShaderType>(stage_index); | 827 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 836 | for (std::size_t i = 0; i < entry.Size(); ++i) { | 828 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 837 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | 829 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); |
| @@ -844,7 +836,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 844 | MICROPROFILE_SCOPE(OpenGL_Texture); | 836 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 845 | const auto& compute = system.GPU().KeplerCompute(); | 837 | const auto& compute = system.GPU().KeplerCompute(); |
| 846 | u32 binding = 0; | 838 | u32 binding = 0; |
| 847 | for (const auto& entry : kernel->GetShaderEntries().samplers) { | 839 | for (const auto& entry : kernel->GetEntries().samplers) { |
| 848 | for (std::size_t i = 0; i < entry.Size(); ++i) { | 840 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 849 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); | 841 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); |
| 850 | SetupTexture(binding++, texture, entry); | 842 | SetupTexture(binding++, texture, entry); |
| @@ -853,7 +845,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 853 | } | 845 | } |
| 854 | 846 | ||
| 855 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 847 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 856 | const GLShader::SamplerEntry& entry) { | 848 | const SamplerEntry& entry) { |
| 857 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 849 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 858 | if (!view) { | 850 | if (!view) { |
| 859 | // Can occur when texture addr is null or its memory is unmapped/invalid | 851 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| @@ -876,7 +868,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 876 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 868 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { |
| 877 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 869 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 878 | u32 binding = device.GetBaseBindings(stage_index).image; | 870 | u32 binding = device.GetBaseBindings(stage_index).image; |
| 879 | for (const auto& entry : shader->GetShaderEntries().images) { | 871 | for (const auto& entry : shader->GetEntries().images) { |
| 880 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 872 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 881 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 873 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; |
| 882 | SetupImage(binding++, tic, entry); | 874 | SetupImage(binding++, tic, entry); |
| @@ -886,14 +878,14 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 886 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 878 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { |
| 887 | const auto& compute = system.GPU().KeplerCompute(); | 879 | const auto& compute = system.GPU().KeplerCompute(); |
| 888 | u32 binding = 0; | 880 | u32 binding = 0; |
| 889 | for (const auto& entry : shader->GetShaderEntries().images) { | 881 | for (const auto& entry : shader->GetEntries().images) { |
| 890 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; | 882 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; |
| 891 | SetupImage(binding++, tic, entry); | 883 | SetupImage(binding++, tic, entry); |
| 892 | } | 884 | } |
| 893 | } | 885 | } |
| 894 | 886 | ||
| 895 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 887 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, |
| 896 | const GLShader::ImageEntry& entry) { | 888 | const ImageEntry& entry) { |
| 897 | const auto view = texture_cache.GetImageSurface(tic, entry); | 889 | const auto view = texture_cache.GetImageSurface(tic, entry); |
| 898 | if (!view) { | 890 | if (!view) { |
| 899 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | 891 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b24c6661b..e83c5ebdc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -98,7 +98,7 @@ private: | |||
| 98 | 98 | ||
| 99 | /// Configures a constant buffer. | 99 | /// Configures a constant buffer. |
| 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 101 | const GLShader::ConstBufferEntry& entry); | 101 | const ConstBufferEntry& entry); |
| 102 | 102 | ||
| 103 | /// Configures the current global memory entries to use for the draw command. | 103 | /// Configures the current global memory entries to use for the draw command. |
| 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); |
| @@ -107,7 +107,7 @@ private: | |||
| 107 | void SetupComputeGlobalMemory(const Shader& kernel); | 107 | void SetupComputeGlobalMemory(const Shader& kernel); |
| 108 | 108 | ||
| 109 | /// Configures a constant buffer. | 109 | /// Configures a constant buffer. |
| 110 | void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 110 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 111 | std::size_t size); | 111 | std::size_t size); |
| 112 | 112 | ||
| 113 | /// Configures the current textures to use for the draw command. | 113 | /// Configures the current textures to use for the draw command. |
| @@ -118,7 +118,7 @@ private: | |||
| 118 | 118 | ||
| 119 | /// Configures a texture. | 119 | /// Configures a texture. |
| 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 121 | const GLShader::SamplerEntry& entry); | 121 | const SamplerEntry& entry); |
| 122 | 122 | ||
| 123 | /// Configures images in a graphics shader. | 123 | /// Configures images in a graphics shader. |
| 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); |
| @@ -127,8 +127,7 @@ private: | |||
| 127 | void SetupComputeImages(const Shader& shader); | 127 | void SetupComputeImages(const Shader& shader); |
| 128 | 128 | ||
| 129 | /// Configures an image. | 129 | /// Configures an image. |
| 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 131 | const GLShader::ImageEntry& entry); | ||
| 132 | 131 | ||
| 133 | /// Syncs the viewport and depth range to match the guest state | 132 | /// Syncs the viewport and depth range to match the guest state |
| 134 | void SyncViewport(); | 133 | void SyncViewport(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cb89db8c..e3a1d5a5f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -2,12 +2,16 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <functional> | ||
| 5 | #include <mutex> | 7 | #include <mutex> |
| 6 | #include <optional> | 8 | #include <optional> |
| 7 | #include <string> | 9 | #include <string> |
| 8 | #include <thread> | 10 | #include <thread> |
| 9 | #include <unordered_set> | 11 | #include <unordered_set> |
| 12 | |||
| 10 | #include <boost/functional/hash.hpp> | 13 | #include <boost/functional/hash.hpp> |
| 14 | |||
| 11 | #include "common/alignment.h" | 15 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | 16 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| @@ -56,7 +60,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | |||
| 56 | } | 60 | } |
| 57 | 61 | ||
| 58 | /// Calculates the size of a program stream | 62 | /// Calculates the size of a program stream |
| 59 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 63 | std::size_t CalculateProgramSize(const ProgramCode& program) { |
| 60 | constexpr std::size_t start_offset = 10; | 64 | constexpr std::size_t start_offset = 10; |
| 61 | // This is the encoded version of BRA that jumps to itself. All Nvidia | 65 | // This is the encoded version of BRA that jumps to itself. All Nvidia |
| 62 | // shaders end with one. | 66 | // shaders end with one. |
| @@ -109,32 +113,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { | |||
| 109 | } | 113 | } |
| 110 | } | 114 | } |
| 111 | 115 | ||
| 112 | /// Describes primitive behavior on geometry shaders | ||
| 113 | constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | ||
| 114 | switch (primitive_mode) { | ||
| 115 | case GL_POINTS: | ||
| 116 | return {"points", 1}; | ||
| 117 | case GL_LINES: | ||
| 118 | case GL_LINE_STRIP: | ||
| 119 | return {"lines", 2}; | ||
| 120 | case GL_LINES_ADJACENCY: | ||
| 121 | case GL_LINE_STRIP_ADJACENCY: | ||
| 122 | return {"lines_adjacency", 4}; | ||
| 123 | case GL_TRIANGLES: | ||
| 124 | case GL_TRIANGLE_STRIP: | ||
| 125 | case GL_TRIANGLE_FAN: | ||
| 126 | return {"triangles", 3}; | ||
| 127 | case GL_TRIANGLES_ADJACENCY: | ||
| 128 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 129 | return {"triangles_adjacency", 6}; | ||
| 130 | default: | ||
| 131 | return {"points", 1}; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | /// Hashes one (or two) program streams | 116 | /// Hashes one (or two) program streams |
| 136 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, | 117 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, |
| 137 | const ProgramCode& code_b) { | 118 | const ProgramCode& code_b = {}) { |
| 138 | u64 unique_identifier = boost::hash_value(code); | 119 | u64 unique_identifier = boost::hash_value(code); |
| 139 | if (is_a) { | 120 | if (is_a) { |
| 140 | // VertexA programs include two programs | 121 | // VertexA programs include two programs |
| @@ -143,24 +124,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co | |||
| 143 | return unique_identifier; | 124 | return unique_identifier; |
| 144 | } | 125 | } |
| 145 | 126 | ||
| 146 | /// Creates an unspecialized program from code streams | ||
| 147 | std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, | ||
| 148 | const std::optional<ShaderIR>& ir_b) { | ||
| 149 | switch (shader_type) { | ||
| 150 | case ShaderType::Vertex: | ||
| 151 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); | ||
| 152 | case ShaderType::Geometry: | ||
| 153 | return GLShader::GenerateGeometryShader(device, ir); | ||
| 154 | case ShaderType::Fragment: | ||
| 155 | return GLShader::GenerateFragmentShader(device, ir); | ||
| 156 | case ShaderType::Compute: | ||
| 157 | return GLShader::GenerateComputeShader(device, ir); | ||
| 158 | default: | ||
| 159 | UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type)); | ||
| 160 | return {}; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | 127 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { |
| 165 | switch (shader_type) { | 128 | switch (shader_type) { |
| 166 | case ShaderType::Vertex: | 129 | case ShaderType::Vertex: |
| @@ -196,102 +159,35 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 196 | return {}; | 159 | return {}; |
| 197 | } | 160 | } |
| 198 | 161 | ||
| 199 | std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { | 162 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 200 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 163 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 201 | } | 164 | } |
| 202 | 165 | ||
| 203 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, | 166 | std::shared_ptr<ConstBufferLocker> MakeLocker(const ShaderDiskCacheEntry& entry) { |
| 204 | ShaderType shader_type) { | 167 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |
| 205 | if (shader_type == ShaderType::Compute) { | 168 | auto locker = std::make_shared<ConstBufferLocker>(entry.type, guest_profile); |
| 206 | return system.GPU().KeplerCompute(); | 169 | locker->SetBoundBuffer(entry.bound_buffer); |
| 207 | } else { | 170 | for (const auto& [address, value] : entry.keys) { |
| 208 | return system.GPU().Maxwell3D(); | 171 | const auto [buffer, offset] = address; |
| 209 | } | 172 | locker->InsertKey(buffer, offset, value); |
| 210 | } | ||
| 211 | |||
| 212 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) { | ||
| 213 | return std::make_unique<ConstBufferLocker>(shader_type, | ||
| 214 | GetConstBufferEngineInterface(system, shader_type)); | ||
| 215 | } | ||
| 216 | |||
| 217 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||
| 218 | locker.SetBoundBuffer(usage.bound_buffer); | ||
| 219 | for (const auto& key : usage.keys) { | ||
| 220 | const auto [buffer, offset] = key.first; | ||
| 221 | locker.InsertKey(buffer, offset, key.second); | ||
| 222 | } | 173 | } |
| 223 | for (const auto& [offset, sampler] : usage.bound_samplers) { | 174 | for (const auto& [offset, sampler] : entry.bound_samplers) { |
| 224 | locker.InsertBoundSampler(offset, sampler); | 175 | locker->InsertBoundSampler(offset, sampler); |
| 225 | } | 176 | } |
| 226 | for (const auto& [key, sampler] : usage.bindless_samplers) { | 177 | for (const auto& [key, sampler] : entry.bindless_samplers) { |
| 227 | const auto [buffer, offset] = key; | 178 | const auto [buffer, offset] = key; |
| 228 | locker.InsertBindlessSampler(buffer, offset, sampler); | 179 | locker->InsertBindlessSampler(buffer, offset, sampler); |
| 229 | } | 180 | } |
| 181 | return locker; | ||
| 230 | } | 182 | } |
| 231 | 183 | ||
| 232 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, | 184 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, |
| 233 | const ProgramCode& code, const ProgramCode& code_b, | 185 | u64 unique_identifier, const ShaderIR& ir, |
| 234 | ConstBufferLocker& locker, const ProgramVariant& variant, | 186 | bool hint_retrievable = false) { |
| 235 | bool hint_retrievable = false) { | 187 | LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); |
| 236 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); | 188 | const std::string glsl = DecompileShader(device, ir, shader_type); |
| 237 | |||
| 238 | const bool is_compute = shader_type == ShaderType::Compute; | ||
| 239 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 240 | const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); | ||
| 241 | std::optional<ShaderIR> ir_b; | ||
| 242 | if (!code_b.empty()) { | ||
| 243 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); | ||
| 244 | } | ||
| 245 | |||
| 246 | std::string source = fmt::format(R"(// {} | ||
| 247 | #version 430 core | ||
| 248 | #extension GL_ARB_separate_shader_objects : enable | ||
| 249 | )", | ||
| 250 | GetShaderId(unique_identifier, shader_type)); | ||
| 251 | if (device.HasShaderBallot()) { | ||
| 252 | source += "#extension GL_ARB_shader_ballot : require\n"; | ||
| 253 | } | ||
| 254 | if (device.HasVertexViewportLayer()) { | ||
| 255 | source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; | ||
| 256 | } | ||
| 257 | if (device.HasImageLoadFormatted()) { | ||
| 258 | source += "#extension GL_EXT_shader_image_load_formatted : require\n"; | ||
| 259 | } | ||
| 260 | if (device.HasWarpIntrinsics()) { | ||
| 261 | source += "#extension GL_NV_gpu_shader5 : require\n" | ||
| 262 | "#extension GL_NV_shader_thread_group : require\n" | ||
| 263 | "#extension GL_NV_shader_thread_shuffle : require\n"; | ||
| 264 | } | ||
| 265 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations) | ||
| 266 | // on places where we don't want to. | ||
| 267 | // Thanks to Ryujinx for finding this workaround. | ||
| 268 | source += "#pragma optionNV(fastmath off)\n"; | ||
| 269 | |||
| 270 | if (shader_type == ShaderType::Geometry) { | ||
| 271 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); | ||
| 272 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); | ||
| 273 | source += fmt::format("layout ({}) in;\n", glsl_topology); | ||
| 274 | } | ||
| 275 | if (shader_type == ShaderType::Compute) { | ||
| 276 | if (variant.local_memory_size > 0) { | ||
| 277 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", | ||
| 278 | Common::AlignUp(variant.local_memory_size, 4) / 4); | ||
| 279 | } | ||
| 280 | source += | ||
| 281 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", | ||
| 282 | variant.block_x, variant.block_y, variant.block_z); | ||
| 283 | |||
| 284 | if (variant.shared_memory_size > 0) { | ||
| 285 | // shared_memory_size is described in number of words | ||
| 286 | source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | source += '\n'; | ||
| 291 | source += GenerateGLSL(device, shader_type, ir, ir_b); | ||
| 292 | |||
| 293 | OGLShader shader; | 189 | OGLShader shader; |
| 294 | shader.Create(source.c_str(), GetGLShaderType(shader_type)); | 190 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |
| 295 | 191 | ||
| 296 | auto program = std::make_shared<OGLProgram>(); | 192 | auto program = std::make_shared<OGLProgram>(); |
| 297 | program->Create(true, hint_retrievable, shader.handle); | 193 | program->Create(true, hint_retrievable, shader.handle); |
| @@ -299,7 +195,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 299 | } | 195 | } |
| 300 | 196 | ||
| 301 | std::unordered_set<GLenum> GetSupportedFormats() { | 197 | std::unordered_set<GLenum> GetSupportedFormats() { |
| 302 | GLint num_formats{}; | 198 | GLint num_formats; |
| 303 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 199 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |
| 304 | 200 | ||
| 305 | std::vector<GLint> formats(num_formats); | 201 | std::vector<GLint> formats(num_formats); |
| @@ -314,115 +210,81 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 314 | 210 | ||
| 315 | } // Anonymous namespace | 211 | } // Anonymous namespace |
| 316 | 212 | ||
| 317 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, | 213 | CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 318 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) | 214 | std::shared_ptr<VideoCommon::Shader::ConstBufferLocker> locker, |
| 319 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, | 215 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) |
| 320 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, | 216 | : RasterizerCacheObject{host_ptr}, locker{std::move(locker)}, entries{std::move(entries)}, |
| 321 | unique_identifier{params.unique_identifier}, shader_type{shader_type}, | 217 | cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} |
| 322 | entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { | 218 | |
| 323 | if (!params.precompiled_variants) { | 219 | CachedShader::~CachedShader() = default; |
| 324 | return; | 220 | |
| 325 | } | 221 | GLuint CachedShader::GetHandle() const { |
| 326 | for (const auto& pair : *params.precompiled_variants) { | 222 | if (!locker->IsConsistent()) { |
| 327 | auto locker = MakeLocker(system, shader_type); | 223 | std::abort(); |
| 328 | const auto& usage = pair->first; | ||
| 329 | FillLocker(*locker, usage); | ||
| 330 | |||
| 331 | std::unique_ptr<LockerVariant>* locker_variant = nullptr; | ||
| 332 | const auto it = | ||
| 333 | std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { | ||
| 334 | return variant->locker->HasEqualKeys(*locker); | ||
| 335 | }); | ||
| 336 | if (it == locker_variants.end()) { | ||
| 337 | locker_variant = &locker_variants.emplace_back(); | ||
| 338 | *locker_variant = std::make_unique<LockerVariant>(); | ||
| 339 | locker_variant->get()->locker = std::move(locker); | ||
| 340 | } else { | ||
| 341 | locker_variant = &*it; | ||
| 342 | } | ||
| 343 | locker_variant->get()->programs.emplace(usage.variant, pair->second); | ||
| 344 | } | 224 | } |
| 225 | return program->handle; | ||
| 345 | } | 226 | } |
| 346 | 227 | ||
| 347 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 228 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 348 | Maxwell::ShaderProgram program_type, ProgramCode code, | 229 | Maxwell::ShaderProgram program_type, ProgramCode code, |
| 349 | ProgramCode code_b) { | 230 | ProgramCode code_b) { |
| 350 | const auto shader_type = GetShaderType(program_type); | 231 | const auto shader_type = GetShaderType(program_type); |
| 351 | params.disk_cache.SaveRaw( | 232 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 352 | ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); | ||
| 353 | 233 | ||
| 354 | ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); | 234 | auto locker = std::make_shared<ConstBufferLocker>(shader_type, params.system.GPU().Maxwell3D()); |
| 355 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 235 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *locker); |
| 356 | // TODO(Rodrigo): Handle VertexA shaders | 236 | // TODO(Rodrigo): Handle VertexA shaders |
| 357 | // std::optional<ShaderIR> ir_b; | 237 | // std::optional<ShaderIR> ir_b; |
| 358 | // if (!code_b.empty()) { | 238 | // if (!code_b.empty()) { |
| 359 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | 239 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); |
| 360 | // } | 240 | // } |
| 361 | return std::shared_ptr<CachedShader>(new CachedShader( | 241 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); |
| 362 | params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); | 242 | |
| 243 | ShaderDiskCacheEntry entry; | ||
| 244 | entry.type = shader_type; | ||
| 245 | entry.code = std::move(code); | ||
| 246 | entry.code_b = std::move(code_b); | ||
| 247 | entry.unique_identifier = params.unique_identifier; | ||
| 248 | entry.bound_buffer = locker->GetBoundBuffer(); | ||
| 249 | entry.keys = locker->GetKeys(); | ||
| 250 | entry.bound_samplers = locker->GetBoundSamplers(); | ||
| 251 | entry.bindless_samplers = locker->GetBindlessSamplers(); | ||
| 252 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 253 | |||
| 254 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 255 | size_in_bytes, std::move(locker), | ||
| 256 | MakeEntries(ir), std::move(program))); | ||
| 363 | } | 257 | } |
| 364 | 258 | ||
| 365 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 259 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 366 | params.disk_cache.SaveRaw( | 260 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 367 | ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); | 261 | |
| 368 | 262 | auto locker = std::make_shared<ConstBufferLocker>(Tegra::Engines::ShaderType::Compute, | |
| 369 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, | 263 | params.system.GPU().KeplerCompute()); |
| 370 | params.system.GPU().KeplerCompute()); | 264 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *locker); |
| 371 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 265 | auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); |
| 372 | return std::shared_ptr<CachedShader>(new CachedShader( | 266 | |
| 373 | params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | 267 | ShaderDiskCacheEntry entry; |
| 268 | entry.type = ShaderType::Compute; | ||
| 269 | entry.code = std::move(code); | ||
| 270 | entry.unique_identifier = params.unique_identifier; | ||
| 271 | entry.bound_buffer = locker->GetBoundBuffer(); | ||
| 272 | entry.keys = locker->GetKeys(); | ||
| 273 | entry.bound_samplers = locker->GetBoundSamplers(); | ||
| 274 | entry.bindless_samplers = locker->GetBindlessSamplers(); | ||
| 275 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 276 | |||
| 277 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 278 | size_in_bytes, std::move(locker), | ||
| 279 | MakeEntries(ir), std::move(program))); | ||
| 374 | } | 280 | } |
| 375 | 281 | ||
| 376 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 282 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 377 | const UnspecializedShader& unspecialized) { | 283 | const PrecompiledShader& precompiled_shader, |
| 378 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, | 284 | std::size_t size_in_bytes) { |
| 379 | unspecialized.entries, unspecialized.code, | 285 | return std::shared_ptr<CachedShader>( |
| 380 | unspecialized.code_b)); | 286 | new CachedShader(params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.locker, |
| 381 | } | 287 | precompiled_shader.entries, precompiled_shader.program)); |
| 382 | |||
| 383 | GLuint CachedShader::GetHandle(const ProgramVariant& variant) { | ||
| 384 | EnsureValidLockerVariant(); | ||
| 385 | |||
| 386 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); | ||
| 387 | auto& program = entry->second; | ||
| 388 | if (!is_cache_miss) { | ||
| 389 | return program->handle; | ||
| 390 | } | ||
| 391 | |||
| 392 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, | ||
| 393 | *curr_locker_variant->locker, variant); | ||
| 394 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); | ||
| 395 | |||
| 396 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 397 | return program->handle; | ||
| 398 | } | ||
| 399 | |||
| 400 | bool CachedShader::EnsureValidLockerVariant() { | ||
| 401 | const auto previous_variant = curr_locker_variant; | ||
| 402 | if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { | ||
| 403 | curr_locker_variant = nullptr; | ||
| 404 | } | ||
| 405 | if (!curr_locker_variant) { | ||
| 406 | for (auto& variant : locker_variants) { | ||
| 407 | if (variant->locker->IsConsistent()) { | ||
| 408 | curr_locker_variant = variant.get(); | ||
| 409 | } | ||
| 410 | } | ||
| 411 | } | ||
| 412 | if (!curr_locker_variant) { | ||
| 413 | auto& new_variant = locker_variants.emplace_back(); | ||
| 414 | new_variant = std::make_unique<LockerVariant>(); | ||
| 415 | new_variant->locker = MakeLocker(system, shader_type); | ||
| 416 | curr_locker_variant = new_variant.get(); | ||
| 417 | } | ||
| 418 | return previous_variant == curr_locker_variant; | ||
| 419 | } | ||
| 420 | |||
| 421 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ||
| 422 | const ConstBufferLocker& locker) const { | ||
| 423 | return ShaderDiskCacheUsage{unique_identifier, variant, | ||
| 424 | locker.GetBoundBuffer(), locker.GetKeys(), | ||
| 425 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; | ||
| 426 | } | 288 | } |
| 427 | 289 | ||
| 428 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 290 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -432,16 +294,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& | |||
| 432 | 294 | ||
| 433 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 295 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 434 | const VideoCore::DiskResourceLoadCallback& callback) { | 296 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 435 | const auto transferable = disk_cache.LoadTransferable(); | 297 | const std::optional transferable = disk_cache.LoadTransferable(); |
| 436 | if (!transferable) { | 298 | if (!transferable) { |
| 437 | return; | 299 | return; |
| 438 | } | 300 | } |
| 439 | const auto [raws, shader_usages] = *transferable; | ||
| 440 | if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | ||
| 441 | return; | ||
| 442 | } | ||
| 443 | 301 | ||
| 444 | const auto dumps = disk_cache.LoadPrecompiled(); | 302 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); |
| 445 | const auto supported_formats = GetSupportedFormats(); | 303 | const auto supported_formats = GetSupportedFormats(); |
| 446 | 304 | ||
| 447 | // Track if precompiled cache was altered during loading to know if we have to | 305 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -450,77 +308,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 450 | 308 | ||
| 451 | // Inform the frontend about shader build initialization | 309 | // Inform the frontend about shader build initialization |
| 452 | if (callback) { | 310 | if (callback) { |
| 453 | callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); | 311 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); |
| 454 | } | 312 | } |
| 455 | 313 | ||
| 456 | std::mutex mutex; | 314 | std::mutex mutex; |
| 457 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | 315 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex |
| 458 | std::atomic_bool compilation_failed = false; | 316 | std::atomic_bool gl_cache_failed = false; |
| 317 | |||
| 318 | const auto find_precompiled = [&gl_cache](u64 id) { | ||
| 319 | return std::find_if(gl_cache.begin(), gl_cache.end(), | ||
| 320 | [id](const auto& entry) { return entry.unique_identifier == id; }); | ||
| 321 | }; | ||
| 459 | 322 | ||
| 460 | const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | 323 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, |
| 461 | std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, | 324 | std::size_t end) { |
| 462 | const ShaderDumpsMap& dumps) { | ||
| 463 | context->MakeCurrent(); | 325 | context->MakeCurrent(); |
| 464 | SCOPE_EXIT({ return context->DoneCurrent(); }); | 326 | SCOPE_EXIT({ return context->DoneCurrent(); }); |
| 465 | 327 | ||
| 466 | for (std::size_t i = begin; i < end; ++i) { | 328 | for (std::size_t i = begin; i < end; ++i) { |
| 467 | if (stop_loading || compilation_failed) { | 329 | if (stop_loading) { |
| 468 | return; | 330 | return; |
| 469 | } | 331 | } |
| 470 | const auto& usage{shader_usages[i]}; | 332 | const auto& entry = (*transferable)[i]; |
| 471 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | 333 | const u64 unique_identifier = entry.unique_identifier; |
| 472 | const auto dump{dumps.find(usage)}; | 334 | const auto it = find_precompiled(unique_identifier); |
| 473 | 335 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | |
| 474 | CachedProgram shader; | 336 | |
| 475 | if (dump != dumps.end()) { | 337 | const bool is_compute = entry.type == ShaderType::Compute; |
| 476 | // If the shader is dumped, attempt to load it with | 338 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 477 | shader = GeneratePrecompiledProgram(dump->second, supported_formats); | 339 | auto locker = MakeLocker(entry); |
| 478 | if (!shader) { | 340 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *locker); |
| 479 | compilation_failed = true; | 341 | |
| 480 | return; | 342 | std::shared_ptr<OGLProgram> program; |
| 343 | if (precompiled_entry) { | ||
| 344 | // If the shader is precompiled, attempt to load it with | ||
| 345 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 346 | if (!program) { | ||
| 347 | gl_cache_failed = true; | ||
| 481 | } | 348 | } |
| 482 | } | 349 | } |
| 483 | if (!shader) { | 350 | if (!program) { |
| 484 | auto locker{MakeLocker(system, unspecialized.type)}; | 351 | // Otherwise compile it from GLSL |
| 485 | FillLocker(*locker, usage); | 352 | program = BuildShader(device, entry.type, unique_identifier, ir, true); |
| 486 | |||
| 487 | shader = BuildShader(device, usage.unique_identifier, unspecialized.type, | ||
| 488 | unspecialized.code, unspecialized.code_b, *locker, | ||
| 489 | usage.variant, true); | ||
| 490 | } | 353 | } |
| 491 | 354 | ||
| 355 | PrecompiledShader shader; | ||
| 356 | shader.program = std::move(program); | ||
| 357 | shader.locker = std::move(locker); | ||
| 358 | shader.entries = MakeEntries(ir); | ||
| 359 | |||
| 492 | std::scoped_lock lock{mutex}; | 360 | std::scoped_lock lock{mutex}; |
| 493 | if (callback) { | 361 | if (callback) { |
| 494 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | 362 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |
| 495 | shader_usages.size()); | 363 | transferable->size()); |
| 496 | } | 364 | } |
| 497 | 365 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | |
| 498 | precompiled_programs.emplace(usage, std::move(shader)); | ||
| 499 | |||
| 500 | // TODO(Rodrigo): Is there a better way to do this? | ||
| 501 | precompiled_variants[usage.unique_identifier].push_back( | ||
| 502 | precompiled_programs.find(usage)); | ||
| 503 | } | 366 | } |
| 504 | }; | 367 | }; |
| 505 | 368 | ||
| 506 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; | 369 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; |
| 507 | const std::size_t bucket_size{shader_usages.size() / num_workers}; | 370 | const std::size_t bucket_size{transferable->size() / num_workers}; |
| 508 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | 371 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |
| 509 | std::vector<std::thread> threads(num_workers); | 372 | std::vector<std::thread> threads(num_workers); |
| 510 | for (std::size_t i = 0; i < num_workers; ++i) { | 373 | for (std::size_t i = 0; i < num_workers; ++i) { |
| 511 | const bool is_last_worker = i + 1 == num_workers; | 374 | const bool is_last_worker = i + 1 == num_workers; |
| 512 | const std::size_t start{bucket_size * i}; | 375 | const std::size_t start{bucket_size * i}; |
| 513 | const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; | 376 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; |
| 514 | 377 | ||
| 515 | // On some platforms the shared context has to be created from the GUI thread | 378 | // On some platforms the shared context has to be created from the GUI thread |
| 516 | contexts[i] = emu_window.CreateSharedContext(); | 379 | contexts[i] = emu_window.CreateSharedContext(); |
| 517 | threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); | 380 | threads[i] = std::thread(worker, contexts[i].get(), start, end); |
| 518 | } | 381 | } |
| 519 | for (auto& thread : threads) { | 382 | for (auto& thread : threads) { |
| 520 | thread.join(); | 383 | thread.join(); |
| 521 | } | 384 | } |
| 522 | 385 | ||
| 523 | if (compilation_failed) { | 386 | if (gl_cache_failed) { |
| 524 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 387 | // Invalidate the precompiled cache if a shader dumped shader was rejected |
| 525 | disk_cache.InvalidatePrecompiled(); | 388 | disk_cache.InvalidatePrecompiled(); |
| 526 | precompiled_cache_altered = true; | 389 | precompiled_cache_altered = true; |
| @@ -533,11 +396,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 533 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 396 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 534 | // before precompiling them | 397 | // before precompiling them |
| 535 | 398 | ||
| 536 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { | 399 | for (std::size_t i = 0; i < transferable->size(); ++i) { |
| 537 | const auto& usage{shader_usages[i]}; | 400 | const u64 id = (*transferable)[i].unique_identifier; |
| 538 | if (dumps.find(usage) == dumps.end()) { | 401 | const auto it = find_precompiled(id); |
| 539 | const auto& program{precompiled_programs.at(usage)}; | 402 | if (it == gl_cache.end()) { |
| 540 | disk_cache.SaveDump(usage, program->handle); | 403 | const GLuint program = runtime_cache.at(id).program->handle; |
| 404 | disk_cache.SavePrecompiled(id, program); | ||
| 541 | precompiled_cache_altered = true; | 405 | precompiled_cache_altered = true; |
| 542 | } | 406 | } |
| 543 | } | 407 | } |
| @@ -547,80 +411,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 547 | } | 411 | } |
| 548 | } | 412 | } |
| 549 | 413 | ||
| 550 | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { | 414 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 551 | const auto it = precompiled_variants.find(unique_identifier); | 415 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 552 | return it == precompiled_variants.end() ? nullptr : &it->second; | 416 | const std::unordered_set<GLenum>& supported_formats) { |
| 553 | } | 417 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| 554 | 418 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | |
| 555 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 556 | const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||
| 557 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | ||
| 558 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | ||
| 559 | return {}; | 419 | return {}; |
| 560 | } | 420 | } |
| 561 | 421 | ||
| 562 | CachedProgram shader = std::make_shared<OGLProgram>(); | 422 | auto program = std::make_shared<OGLProgram>(); |
| 563 | shader->handle = glCreateProgram(); | 423 | program->handle = glCreateProgram(); |
| 564 | glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 424 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 565 | glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), | 425 | glProgramBinary(program->handle, precompiled_entry.binary_format, |
| 566 | static_cast<GLsizei>(dump.binary.size())); | 426 | precompiled_entry.binary.data(), |
| 567 | 427 | static_cast<GLsizei>(precompiled_entry.binary.size())); | |
| 568 | GLint link_status{}; | 428 | |
| 569 | glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); | 429 | GLint link_status; |
| 430 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | ||
| 570 | if (link_status == GL_FALSE) { | 431 | if (link_status == GL_FALSE) { |
| 571 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); | 432 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 572 | return {}; | 433 | return {}; |
| 573 | } | 434 | } |
| 574 | 435 | ||
| 575 | return shader; | 436 | return program; |
| 576 | } | ||
| 577 | |||
| 578 | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | ||
| 579 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | ||
| 580 | const std::vector<ShaderDiskCacheRaw>& raws) { | ||
| 581 | if (callback) { | ||
| 582 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | ||
| 583 | } | ||
| 584 | |||
| 585 | for (std::size_t i = 0; i < raws.size(); ++i) { | ||
| 586 | if (stop_loading) { | ||
| 587 | return false; | ||
| 588 | } | ||
| 589 | const auto& raw{raws[i]}; | ||
| 590 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | ||
| 591 | const u64 calculated_hash{ | ||
| 592 | GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; | ||
| 593 | if (unique_identifier != calculated_hash) { | ||
| 594 | LOG_ERROR(Render_OpenGL, | ||
| 595 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " | ||
| 596 | "removing shader cache", | ||
| 597 | raw.GetUniqueIdentifier(), calculated_hash); | ||
| 598 | disk_cache.InvalidateTransferable(); | ||
| 599 | return false; | ||
| 600 | } | ||
| 601 | |||
| 602 | const u32 main_offset = | ||
| 603 | raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 604 | ConstBufferLocker locker(raw.GetType()); | ||
| 605 | const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); | ||
| 606 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 607 | // std::optional<ShaderIR> ir_b; | ||
| 608 | // if (raw.HasProgramA()) { | ||
| 609 | // ir_b.emplace(raw.GetProgramCodeB(), main_offset); | ||
| 610 | // } | ||
| 611 | |||
| 612 | UnspecializedShader unspecialized; | ||
| 613 | unspecialized.entries = GLShader::GetEntries(ir); | ||
| 614 | unspecialized.type = raw.GetType(); | ||
| 615 | unspecialized.code = raw.GetCode(); | ||
| 616 | unspecialized.code_b = raw.GetCodeB(); | ||
| 617 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); | ||
| 618 | |||
| 619 | if (callback) { | ||
| 620 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | ||
| 621 | } | ||
| 622 | } | ||
| 623 | return true; | ||
| 624 | } | 437 | } |
| 625 | 438 | ||
| 626 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 439 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| @@ -648,17 +461,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 648 | 461 | ||
| 649 | const auto unique_identifier = GetUniqueIdentifier( | 462 | const auto unique_identifier = GetUniqueIdentifier( |
| 650 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 463 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 651 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 652 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 464 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 653 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 465 | const ShaderParameters params{system, disk_cache, device, |
| 654 | cpu_addr, host_ptr, unique_identifier}; | 466 | cpu_addr, host_ptr, unique_identifier}; |
| 655 | 467 | ||
| 656 | const auto found = unspecialized_shaders.find(unique_identifier); | 468 | const auto found = runtime_cache.find(unique_identifier); |
| 657 | if (found == unspecialized_shaders.end()) { | 469 | if (found == runtime_cache.end()) { |
| 658 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | 470 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), |
| 659 | std::move(code_b)); | 471 | std::move(code_b)); |
| 660 | } else { | 472 | } else { |
| 661 | shader = CachedShader::CreateFromCache(params, found->second); | 473 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 474 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 662 | } | 475 | } |
| 663 | Register(shader); | 476 | Register(shader); |
| 664 | 477 | ||
| @@ -673,19 +486,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 673 | return kernel; | 486 | return kernel; |
| 674 | } | 487 | } |
| 675 | 488 | ||
| 676 | // No kernel found - create a new one | 489 | // No kernel found, create a new one |
| 677 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 490 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 678 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; | 491 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |
| 679 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 680 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 492 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 681 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 493 | const ShaderParameters params{system, disk_cache, device, |
| 682 | cpu_addr, host_ptr, unique_identifier}; | 494 | cpu_addr, host_ptr, unique_identifier}; |
| 683 | 495 | ||
| 684 | const auto found = unspecialized_shaders.find(unique_identifier); | 496 | const auto found = runtime_cache.find(unique_identifier); |
| 685 | if (found == unspecialized_shaders.end()) { | 497 | if (found == runtime_cache.end()) { |
| 686 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 498 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |
| 687 | } else { | 499 | } else { |
| 688 | kernel = CachedShader::CreateFromCache(params, found->second); | 500 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 501 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 689 | } | 502 | } |
| 690 | 503 | ||
| 691 | Register(kernel); | 504 | Register(kernel); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 7b1470db3..03d7a2b3f 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -41,22 +41,17 @@ class RasterizerOpenGL; | |||
| 41 | struct UnspecializedShader; | 41 | struct UnspecializedShader; |
| 42 | 42 | ||
| 43 | using Shader = std::shared_ptr<CachedShader>; | 43 | using Shader = std::shared_ptr<CachedShader>; |
| 44 | using CachedProgram = std::shared_ptr<OGLProgram>; | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 45 | |
| 47 | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; | 46 | struct PrecompiledShader { |
| 48 | 47 | std::shared_ptr<OGLProgram> program; | |
| 49 | struct UnspecializedShader { | 48 | std::shared_ptr<VideoCommon::Shader::ConstBufferLocker> locker; |
| 50 | GLShader::ShaderEntries entries; | 49 | ShaderEntries entries; |
| 51 | Tegra::Engines::ShaderType type; | ||
| 52 | ProgramCode code; | ||
| 53 | ProgramCode code_b; | ||
| 54 | }; | 50 | }; |
| 55 | 51 | ||
| 56 | struct ShaderParameters { | 52 | struct ShaderParameters { |
| 57 | Core::System& system; | 53 | Core::System& system; |
| 58 | ShaderDiskCacheOpenGL& disk_cache; | 54 | ShaderDiskCacheOpenGL& disk_cache; |
| 59 | const PrecompiledVariants* precompiled_variants; | ||
| 60 | const Device& device; | 55 | const Device& device; |
| 61 | VAddr cpu_addr; | 56 | VAddr cpu_addr; |
| 62 | u8* host_ptr; | 57 | u8* host_ptr; |
| @@ -65,61 +60,45 @@ struct ShaderParameters { | |||
| 65 | 60 | ||
| 66 | class CachedShader final : public RasterizerCacheObject { | 61 | class CachedShader final : public RasterizerCacheObject { |
| 67 | public: | 62 | public: |
| 68 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 63 | ~CachedShader(); |
| 69 | Maxwell::ShaderProgram program_type, | ||
| 70 | ProgramCode program_code, ProgramCode program_code_b); | ||
| 71 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | ||
| 72 | 64 | ||
| 73 | static Shader CreateFromCache(const ShaderParameters& params, | 65 | /// Gets the GL program handle for the shader |
| 74 | const UnspecializedShader& unspecialized); | 66 | GLuint GetHandle() const; |
| 75 | 67 | ||
| 68 | /// Returns the guest CPU address of the shader | ||
| 76 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 77 | return cpu_addr; | 70 | return cpu_addr; |
| 78 | } | 71 | } |
| 79 | 72 | ||
| 73 | /// Returns the size in bytes of the shader | ||
| 80 | std::size_t GetSizeInBytes() const override { | 74 | std::size_t GetSizeInBytes() const override { |
| 81 | return code.size() * sizeof(u64); | 75 | return size_in_bytes; |
| 82 | } | 76 | } |
| 83 | 77 | ||
| 84 | /// Gets the shader entries for the shader | 78 | /// Gets the shader entries for the shader |
| 85 | const GLShader::ShaderEntries& GetShaderEntries() const { | 79 | const ShaderEntries& GetEntries() const { |
| 86 | return entries; | 80 | return entries; |
| 87 | } | 81 | } |
| 88 | 82 | ||
| 89 | /// Gets the GL program handle for the shader | 83 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 90 | GLuint GetHandle(const ProgramVariant& variant); | 84 | Maxwell::ShaderProgram program_type, |
| 91 | 85 | ProgramCode program_code, ProgramCode program_code_b); | |
| 92 | private: | 86 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); |
| 93 | struct LockerVariant { | ||
| 94 | std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||
| 95 | std::unordered_map<ProgramVariant, CachedProgram> programs; | ||
| 96 | }; | ||
| 97 | |||
| 98 | explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, | ||
| 99 | GLShader::ShaderEntries entries, ProgramCode program_code, | ||
| 100 | ProgramCode program_code_b); | ||
| 101 | |||
| 102 | bool EnsureValidLockerVariant(); | ||
| 103 | |||
| 104 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, | ||
| 105 | const VideoCommon::Shader::ConstBufferLocker& locker) const; | ||
| 106 | |||
| 107 | Core::System& system; | ||
| 108 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 109 | const Device& device; | ||
| 110 | |||
| 111 | VAddr cpu_addr{}; | ||
| 112 | |||
| 113 | u64 unique_identifier{}; | ||
| 114 | Tegra::Engines::ShaderType shader_type{}; | ||
| 115 | |||
| 116 | GLShader::ShaderEntries entries; | ||
| 117 | 87 | ||
| 118 | ProgramCode code; | 88 | static Shader CreateFromCache(const ShaderParameters& params, |
| 119 | ProgramCode code_b; | 89 | const PrecompiledShader& precompiled_shader, |
| 90 | std::size_t size_in_bytes); | ||
| 120 | 91 | ||
| 121 | LockerVariant* curr_locker_variant = nullptr; | 92 | private: |
| 122 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; | 93 | explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 94 | std::shared_ptr<VideoCommon::Shader::ConstBufferLocker> locker, | ||
| 95 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | ||
| 96 | |||
| 97 | std::shared_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||
| 98 | ShaderEntries entries; | ||
| 99 | VAddr cpu_addr = 0; | ||
| 100 | std::size_t size_in_bytes = 0; | ||
| 101 | std::shared_ptr<OGLProgram> program; | ||
| 123 | }; | 102 | }; |
| 124 | 103 | ||
| 125 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 104 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -142,25 +121,15 @@ protected: | |||
| 142 | void FlushObjectInner(const Shader& object) override {} | 121 | void FlushObjectInner(const Shader& object) override {} |
| 143 | 122 | ||
| 144 | private: | 123 | private: |
| 145 | bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, | 124 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( |
| 146 | const VideoCore::DiskResourceLoadCallback& callback, | 125 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 147 | const std::vector<ShaderDiskCacheRaw>& raws); | 126 | const std::unordered_set<GLenum>& supported_formats); |
| 148 | |||
| 149 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | ||
| 150 | const std::unordered_set<GLenum>& supported_formats); | ||
| 151 | |||
| 152 | const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||
| 153 | 127 | ||
| 154 | Core::System& system; | 128 | Core::System& system; |
| 155 | Core::Frontend::EmuWindow& emu_window; | 129 | Core::Frontend::EmuWindow& emu_window; |
| 156 | const Device& device; | 130 | const Device& device; |
| 157 | |||
| 158 | ShaderDiskCacheOpenGL disk_cache; | 131 | ShaderDiskCacheOpenGL disk_cache; |
| 159 | 132 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | |
| 160 | PrecompiledPrograms precompiled_programs; | ||
| 161 | std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||
| 162 | |||
| 163 | std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||
| 164 | 133 | ||
| 165 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 134 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 166 | }; | 135 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3a41ed30c..308e57aae 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #include "video_core/shader/node.h" | 24 | #include "video_core/shader/node.h" |
| 25 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | 26 | ||
| 27 | namespace OpenGL::GLShader { | 27 | namespace OpenGL { |
| 28 | 28 | ||
| 29 | namespace { | 29 | namespace { |
| 30 | 30 | ||
| @@ -56,6 +56,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| 56 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 56 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 57 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); | 57 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 58 | 58 | ||
| 59 | std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | ||
| 60 | #define ftou floatBitsToUint | ||
| 61 | #define itof intBitsToFloat | ||
| 62 | #define utof uintBitsToFloat | ||
| 63 | |||
| 64 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 65 | bvec2 is_nan1 = isnan(pair1); | ||
| 66 | bvec2 is_nan2 = isnan(pair2); | ||
| 67 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 68 | }} | ||
| 69 | |||
| 70 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 71 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 72 | |||
| 73 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 74 | float y_direction; | ||
| 75 | }}; | ||
| 76 | )"; | ||
| 77 | |||
| 59 | class ShaderWriter final { | 78 | class ShaderWriter final { |
| 60 | public: | 79 | public: |
| 61 | void AddExpression(std::string_view text) { | 80 | void AddExpression(std::string_view text) { |
| @@ -270,11 +289,16 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | |||
| 270 | } | 289 | } |
| 271 | 290 | ||
| 272 | /// Generates code to use for a swizzle operation. | 291 | /// Generates code to use for a swizzle operation. |
| 273 | constexpr const char* GetSwizzle(u32 element) { | 292 | constexpr const char* GetSwizzle(std::size_t element) { |
| 274 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | 293 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |
| 275 | return swizzle.at(element); | 294 | return swizzle.at(element); |
| 276 | } | 295 | } |
| 277 | 296 | ||
| 297 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 298 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 299 | return swizzle.at(element); | ||
| 300 | } | ||
| 301 | |||
| 278 | /// Translate topology | 302 | /// Translate topology |
| 279 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | 303 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 280 | switch (topology) { | 304 | switch (topology) { |
| @@ -344,9 +368,48 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 344 | class GLSLDecompiler final { | 368 | class GLSLDecompiler final { |
| 345 | public: | 369 | public: |
| 346 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, | 370 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, |
| 347 | std::string suffix) | 371 | std::string_view suffix) |
| 348 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 372 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 349 | 373 | ||
| 374 | void Decompile() { | ||
| 375 | DeclareHeader(); | ||
| 376 | DeclareVertex(); | ||
| 377 | DeclareGeometry(); | ||
| 378 | DeclareFragment(); | ||
| 379 | DeclareRegisters(); | ||
| 380 | DeclareCustomVariables(); | ||
| 381 | DeclarePredicates(); | ||
| 382 | DeclareLocalMemory(); | ||
| 383 | DeclareInternalFlags(); | ||
| 384 | DeclareInputAttributes(); | ||
| 385 | DeclareOutputAttributes(); | ||
| 386 | DeclareConstantBuffers(); | ||
| 387 | DeclareGlobalMemory(); | ||
| 388 | DeclareSamplers(); | ||
| 389 | DeclareImages(); | ||
| 390 | DeclarePhysicalAttributeReader(); | ||
| 391 | |||
| 392 | code.AddLine("void main() {{"); | ||
| 393 | ++code.scope; | ||
| 394 | |||
| 395 | if (ir.IsDecompiled()) { | ||
| 396 | DecompileAST(); | ||
| 397 | } else { | ||
| 398 | DecompileBranchMode(); | ||
| 399 | } | ||
| 400 | |||
| 401 | --code.scope; | ||
| 402 | code.AddLine("}}"); | ||
| 403 | } | ||
| 404 | |||
| 405 | std::string GetResult() { | ||
| 406 | return code.GetResult(); | ||
| 407 | } | ||
| 408 | |||
| 409 | private: | ||
| 410 | friend class ASTDecompiler; | ||
| 411 | friend class ExprDecompiler; | ||
| 412 | |||
| 350 | void DecompileBranchMode() { | 413 | void DecompileBranchMode() { |
| 351 | // VM's program counter | 414 | // VM's program counter |
| 352 | const auto first_address = ir.GetBasicBlocks().begin()->first; | 415 | const auto first_address = ir.GetBasicBlocks().begin()->first; |
| @@ -387,43 +450,33 @@ public: | |||
| 387 | 450 | ||
| 388 | void DecompileAST(); | 451 | void DecompileAST(); |
| 389 | 452 | ||
| 390 | void Decompile() { | 453 | void DeclareHeader() { |
| 391 | DeclareVertex(); | 454 | code.AddLine("#version 450 compatibility"); |
| 392 | DeclareGeometry(); | 455 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 393 | DeclareRegisters(); | 456 | if (device.HasShaderBallot()) { |
| 394 | DeclareCustomVariables(); | 457 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| 395 | DeclarePredicates(); | 458 | } |
| 396 | DeclareLocalMemory(); | 459 | if (device.HasVertexViewportLayer()) { |
| 397 | DeclareInternalFlags(); | 460 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); |
| 398 | DeclareInputAttributes(); | 461 | } |
| 399 | DeclareOutputAttributes(); | 462 | if (device.HasImageLoadFormatted()) { |
| 400 | DeclareConstantBuffers(); | 463 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); |
| 401 | DeclareGlobalMemory(); | 464 | } |
| 402 | DeclareSamplers(); | 465 | if (device.HasWarpIntrinsics()) { |
| 403 | DeclareImages(); | 466 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); |
| 404 | DeclarePhysicalAttributeReader(); | 467 | code.AddLine("#extension GL_NV_shader_thread_group : require"); |
| 405 | 468 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | |
| 406 | code.AddLine("void execute_{}() {{", suffix); | ||
| 407 | ++code.scope; | ||
| 408 | |||
| 409 | if (ir.IsDecompiled()) { | ||
| 410 | DecompileAST(); | ||
| 411 | } else { | ||
| 412 | DecompileBranchMode(); | ||
| 413 | } | 469 | } |
| 470 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 471 | // operations) on places where we don't want to. | ||
| 472 | // Thanks to Ryujinx for finding this workaround. | ||
| 473 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 414 | 474 | ||
| 415 | --code.scope; | 475 | code.AddNewLine(); |
| 416 | code.AddLine("}}"); | ||
| 417 | } | ||
| 418 | 476 | ||
| 419 | std::string GetResult() { | 477 | code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); |
| 420 | return code.GetResult(); | ||
| 421 | } | 478 | } |
| 422 | 479 | ||
| 423 | private: | ||
| 424 | friend class ASTDecompiler; | ||
| 425 | friend class ExprDecompiler; | ||
| 426 | |||
| 427 | void DeclareVertex() { | 480 | void DeclareVertex() { |
| 428 | if (!IsVertexShader(stage)) | 481 | if (!IsVertexShader(stage)) |
| 429 | return; | 482 | return; |
| @@ -450,6 +503,24 @@ private: | |||
| 450 | DeclareVertexRedeclarations(); | 503 | DeclareVertexRedeclarations(); |
| 451 | } | 504 | } |
| 452 | 505 | ||
| 506 | void DeclareFragment() { | ||
| 507 | if (stage != ShaderType::Fragment) { | ||
| 508 | return; | ||
| 509 | } | ||
| 510 | |||
| 511 | bool any = false; | ||
| 512 | for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { | ||
| 513 | if (!IsRenderTargetEnabled(render_target)) { | ||
| 514 | continue; | ||
| 515 | } | ||
| 516 | code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target); | ||
| 517 | any = true; | ||
| 518 | } | ||
| 519 | if (any) { | ||
| 520 | code.AddNewLine(); | ||
| 521 | } | ||
| 522 | } | ||
| 523 | |||
| 453 | void DeclareVertexRedeclarations() { | 524 | void DeclareVertexRedeclarations() { |
| 454 | code.AddLine("out gl_PerVertex {{"); | 525 | code.AddLine("out gl_PerVertex {{"); |
| 455 | ++code.scope; | 526 | ++code.scope; |
| @@ -1945,7 +2016,7 @@ private: | |||
| 1945 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 2016 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1946 | for (u32 component = 0; component < 4; ++component) { | 2017 | for (u32 component = 0; component < 4; ++component) { |
| 1947 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 2018 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1948 | code.AddLine("FragColor{}[{}] = {};", render_target, component, | 2019 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), |
| 1949 | SafeGetRegister(current_reg).AsFloat()); | 2020 | SafeGetRegister(current_reg).AsFloat()); |
| 1950 | ++current_reg; | 2021 | ++current_reg; |
| 1951 | } | 2022 | } |
| @@ -2298,7 +2369,11 @@ private: | |||
| 2298 | } | 2369 | } |
| 2299 | 2370 | ||
| 2300 | std::string GetLocalMemory() const { | 2371 | std::string GetLocalMemory() const { |
| 2301 | return "lmem_" + suffix; | 2372 | if (suffix.empty()) { |
| 2373 | return "lmem"; | ||
| 2374 | } else { | ||
| 2375 | return "lmem_" + std::string{suffix}; | ||
| 2376 | } | ||
| 2302 | } | 2377 | } |
| 2303 | 2378 | ||
| 2304 | std::string GetInternalFlag(InternalFlag flag) const { | 2379 | std::string GetInternalFlag(InternalFlag flag) const { |
| @@ -2307,7 +2382,11 @@ private: | |||
| 2307 | const auto index = static_cast<u32>(flag); | 2382 | const auto index = static_cast<u32>(flag); |
| 2308 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | 2383 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); |
| 2309 | 2384 | ||
| 2310 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | 2385 | if (suffix.empty()) { |
| 2386 | return InternalFlagNames[index]; | ||
| 2387 | } else { | ||
| 2388 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2389 | } | ||
| 2311 | } | 2390 | } |
| 2312 | 2391 | ||
| 2313 | std::string GetSampler(const Sampler& sampler) const { | 2392 | std::string GetSampler(const Sampler& sampler) const { |
| @@ -2319,7 +2398,11 @@ private: | |||
| 2319 | } | 2398 | } |
| 2320 | 2399 | ||
| 2321 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { | 2400 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { |
| 2322 | return fmt::format("{}_{}_{}", name, index, suffix); | 2401 | if (suffix.empty()) { |
| 2402 | return fmt::format("{}{}", name, index); | ||
| 2403 | } else { | ||
| 2404 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2405 | } | ||
| 2323 | } | 2406 | } |
| 2324 | 2407 | ||
| 2325 | u32 GetNumPhysicalInputAttributes() const { | 2408 | u32 GetNumPhysicalInputAttributes() const { |
| @@ -2334,17 +2417,26 @@ private: | |||
| 2334 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | 2417 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); |
| 2335 | } | 2418 | } |
| 2336 | 2419 | ||
| 2420 | bool IsRenderTargetEnabled(u32 render_target) const { | ||
| 2421 | for (u32 component = 0; component < 4; ++component) { | ||
| 2422 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2423 | return true; | ||
| 2424 | } | ||
| 2425 | } | ||
| 2426 | return false; | ||
| 2427 | } | ||
| 2428 | |||
| 2337 | const Device& device; | 2429 | const Device& device; |
| 2338 | const ShaderIR& ir; | 2430 | const ShaderIR& ir; |
| 2339 | const ShaderType stage; | 2431 | const ShaderType stage; |
| 2340 | const std::string suffix; | 2432 | const std::string_view suffix; |
| 2341 | const Header header; | 2433 | const Header header; |
| 2342 | 2434 | ||
| 2343 | ShaderWriter code; | 2435 | ShaderWriter code; |
| 2344 | }; | 2436 | }; |
| 2345 | 2437 | ||
| 2346 | std::string GetFlowVariable(u32 i) { | 2438 | std::string GetFlowVariable(u32 index) { |
| 2347 | return fmt::format("flow_var_{}", i); | 2439 | return fmt::format("flow_var{}", index); |
| 2348 | } | 2440 | } |
| 2349 | 2441 | ||
| 2350 | class ExprDecompiler { | 2442 | class ExprDecompiler { |
| @@ -2531,7 +2623,7 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2531 | 2623 | ||
| 2532 | } // Anonymous namespace | 2624 | } // Anonymous namespace |
| 2533 | 2625 | ||
| 2534 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | 2626 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { |
| 2535 | ShaderEntries entries; | 2627 | ShaderEntries entries; |
| 2536 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 2628 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 2537 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | 2629 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |
| @@ -2555,28 +2647,11 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2555 | return entries; | 2647 | return entries; |
| 2556 | } | 2648 | } |
| 2557 | 2649 | ||
| 2558 | std::string GetCommonDeclarations() { | 2650 | std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage, |
| 2559 | return R"(#define ftoi floatBitsToInt | 2651 | std::string_view suffix) { |
| 2560 | #define ftou floatBitsToUint | ||
| 2561 | #define itof intBitsToFloat | ||
| 2562 | #define utof uintBitsToFloat | ||
| 2563 | |||
| 2564 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | ||
| 2565 | bvec2 is_nan1 = isnan(pair1); | ||
| 2566 | bvec2 is_nan2 = isnan(pair2); | ||
| 2567 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 2571 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 2572 | )"; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, | ||
| 2576 | const std::string& suffix) { | ||
| 2577 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2652 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 2578 | decompiler.Decompile(); | 2653 | decompiler.Decompile(); |
| 2579 | return decompiler.GetResult(); | 2654 | return decompiler.GetResult(); |
| 2580 | } | 2655 | } |
| 2581 | 2656 | ||
| 2582 | } // namespace OpenGL::GLShader | 2657 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 0f692c1db..ae97ab504 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <string_view> | ||
| 9 | #include <utility> | 10 | #include <utility> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -18,10 +19,8 @@ class ShaderIR; | |||
| 18 | } | 19 | } |
| 19 | 20 | ||
| 20 | namespace OpenGL { | 21 | namespace OpenGL { |
| 21 | class Device; | ||
| 22 | } | ||
| 23 | 22 | ||
| 24 | namespace OpenGL::GLShader { | 23 | class Device; |
| 25 | 24 | ||
| 26 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 27 | using SamplerEntry = VideoCommon::Shader::Sampler; | 26 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| @@ -78,11 +77,9 @@ struct ShaderEntries { | |||
| 78 | std::size_t shader_length{}; | 77 | std::size_t shader_length{}; |
| 79 | }; | 78 | }; |
| 80 | 79 | ||
| 81 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | 80 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 82 | |||
| 83 | std::string GetCommonDeclarations(); | ||
| 84 | 81 | ||
| 85 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 82 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 86 | Tegra::Engines::ShaderType stage, const std::string& suffix); | 83 | Tegra::Engines::ShaderType stage, std::string_view suffix = {}); |
| 87 | 84 | ||
| 88 | } // namespace OpenGL::GLShader | 85 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 1fc204f6f..0e1717c5e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -31,32 +31,24 @@ namespace { | |||
| 31 | 31 | ||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 32 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 33 | ||
| 34 | enum class TransferableEntryKind : u32 { | ||
| 35 | Raw, | ||
| 36 | Usage, | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct ConstBufferKey { | 34 | struct ConstBufferKey { |
| 40 | u32 cbuf{}; | 35 | u32 cbuf = 0; |
| 41 | u32 offset{}; | 36 | u32 offset = 0; |
| 42 | u32 value{}; | 37 | u32 value = 0; |
| 43 | }; | 38 | }; |
| 44 | 39 | ||
| 45 | struct BoundSamplerKey { | 40 | struct BoundSamplerKey { |
| 46 | u32 offset{}; | 41 | u32 offset = 0; |
| 47 | Tegra::Engines::SamplerDescriptor sampler{}; | 42 | Tegra::Engines::SamplerDescriptor sampler; |
| 48 | }; | 43 | }; |
| 49 | 44 | ||
| 50 | struct BindlessSamplerKey { | 45 | struct BindlessSamplerKey { |
| 51 | u32 cbuf{}; | 46 | u32 cbuf = 0; |
| 52 | u32 offset{}; | 47 | u32 offset = 0; |
| 53 | Tegra::Engines::SamplerDescriptor sampler{}; | 48 | Tegra::Engines::SamplerDescriptor sampler; |
| 54 | }; | 49 | }; |
| 55 | 50 | ||
| 56 | constexpr u32 NativeVersion = 12; | 51 | constexpr u32 NativeVersion = 16; |
| 57 | |||
| 58 | // Making sure sizes doesn't change by accident | ||
| 59 | static_assert(sizeof(ProgramVariant) == 20); | ||
| 60 | 52 | ||
| 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 62 | ShaderCacheVersionHash hash{}; | 54 | ShaderCacheVersionHash hash{}; |
| @@ -67,61 +59,122 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 67 | 59 | ||
| 68 | } // Anonymous namespace | 60 | } // Anonymous namespace |
| 69 | 61 | ||
| 70 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, | 62 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; |
| 71 | ProgramCode code_b) | ||
| 72 | : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( | ||
| 73 | code_b)} {} | ||
| 74 | 63 | ||
| 75 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 64 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; |
| 76 | 65 | ||
| 77 | ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; | 66 | bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { |
| 78 | 67 | if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | |
| 79 | bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | ||
| 80 | if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || | ||
| 81 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | ||
| 82 | return false; | 68 | return false; |
| 83 | } | 69 | } |
| 84 | u32 code_size{}; | 70 | u32 code_size; |
| 85 | u32 code_size_b{}; | 71 | u32 code_size_b; |
| 86 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || | 72 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || |
| 87 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { | 73 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { |
| 88 | return false; | 74 | return false; |
| 89 | } | 75 | } |
| 90 | |||
| 91 | code.resize(code_size); | 76 | code.resize(code_size); |
| 92 | code_b.resize(code_size_b); | 77 | code_b.resize(code_size_b); |
| 93 | 78 | ||
| 94 | if (file.ReadArray(code.data(), code_size) != code_size) | 79 | if (file.ReadArray(code.data(), code_size) != code_size) { |
| 95 | return false; | 80 | return false; |
| 96 | 81 | } | |
| 97 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { | 82 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { |
| 98 | return false; | 83 | return false; |
| 99 | } | 84 | } |
| 85 | |||
| 86 | bool is_texture_handler_size_known; | ||
| 87 | u32 texture_handler_size_value; | ||
| 88 | u32 num_keys; | ||
| 89 | u32 num_bound_samplers; | ||
| 90 | u32 num_bindless_samplers; | ||
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | ||
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | ||
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 94 | file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 95 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 96 | return false; | ||
| 97 | } | ||
| 98 | if (is_texture_handler_size_known) { | ||
| 99 | texture_handler_size = texture_handler_size_value; | ||
| 100 | } | ||
| 101 | |||
| 102 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 103 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | ||
| 104 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | ||
| 105 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | ||
| 106 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | ||
| 107 | flat_bound_samplers.size() || | ||
| 108 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | ||
| 109 | flat_bindless_samplers.size()) { | ||
| 110 | return false; | ||
| 111 | } | ||
| 112 | for (const auto& key : flat_keys) { | ||
| 113 | keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 114 | } | ||
| 115 | for (const auto& key : flat_bound_samplers) { | ||
| 116 | bound_samplers.emplace(key.offset, key.sampler); | ||
| 117 | } | ||
| 118 | for (const auto& key : flat_bindless_samplers) { | ||
| 119 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 120 | } | ||
| 121 | |||
| 100 | return true; | 122 | return true; |
| 101 | } | 123 | } |
| 102 | 124 | ||
| 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 125 | bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { |
| 104 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || | 126 | if (file.WriteObject(static_cast<u32>(type)) != 1 || |
| 105 | file.WriteObject(static_cast<u32>(code.size())) != 1 || | 127 | file.WriteObject(static_cast<u32>(code.size())) != 1 || |
| 106 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { | 128 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { |
| 107 | return false; | 129 | return false; |
| 108 | } | 130 | } |
| 109 | 131 | if (file.WriteArray(code.data(), code.size()) != code.size()) { | |
| 110 | if (file.WriteArray(code.data(), code.size()) != code.size()) | ||
| 111 | return false; | 132 | return false; |
| 112 | 133 | } | |
| 113 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { | 134 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { |
| 114 | return false; | 135 | return false; |
| 115 | } | 136 | } |
| 116 | return true; | 137 | |
| 138 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || | ||
| 139 | file.WriteObject(texture_handler_size.has_value()) != 1 || | ||
| 140 | file.WriteObject(texture_handler_size.value_or(0)) != 1 || | ||
| 141 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | ||
| 142 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | ||
| 143 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | ||
| 144 | return false; | ||
| 145 | } | ||
| 146 | |||
| 147 | std::vector<ConstBufferKey> flat_keys; | ||
| 148 | flat_keys.reserve(keys.size()); | ||
| 149 | for (const auto& [address, value] : keys) { | ||
| 150 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 151 | } | ||
| 152 | |||
| 153 | std::vector<BoundSamplerKey> flat_bound_samplers; | ||
| 154 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 155 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 156 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | ||
| 157 | } | ||
| 158 | |||
| 159 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | ||
| 160 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 161 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 162 | flat_bindless_samplers.push_back( | ||
| 163 | BindlessSamplerKey{address.first, address.second, sampler}); | ||
| 164 | } | ||
| 165 | |||
| 166 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | ||
| 167 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | ||
| 168 | flat_bound_samplers.size() && | ||
| 169 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | ||
| 170 | flat_bindless_samplers.size(); | ||
| 117 | } | 171 | } |
| 118 | 172 | ||
| 119 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} | 173 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} |
| 120 | 174 | ||
| 121 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | 175 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; |
| 122 | 176 | ||
| 123 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 177 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { |
| 124 | ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 125 | // Skip games without title id | 178 | // Skip games without title id |
| 126 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; | 179 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; |
| 127 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { | 180 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { |
| @@ -130,17 +183,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 130 | 183 | ||
| 131 | FileUtil::IOFile file(GetTransferablePath(), "rb"); | 184 | FileUtil::IOFile file(GetTransferablePath(), "rb"); |
| 132 | if (!file.IsOpen()) { | 185 | if (!file.IsOpen()) { |
| 133 | LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", | 186 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); |
| 134 | GetTitleID()); | ||
| 135 | is_usable = true; | 187 | is_usable = true; |
| 136 | return {}; | 188 | return {}; |
| 137 | } | 189 | } |
| 138 | 190 | ||
| 139 | u32 version{}; | 191 | u32 version{}; |
| 140 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { | 192 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { |
| 141 | LOG_ERROR(Render_OpenGL, | 193 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); |
| 142 | "Failed to get transferable cache version for title id={}, skipping", | ||
| 143 | GetTitleID()); | ||
| 144 | return {}; | 194 | return {}; |
| 145 | } | 195 | } |
| 146 | 196 | ||
| @@ -158,105 +208,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 158 | } | 208 | } |
| 159 | 209 | ||
| 160 | // Version is valid, load the shaders | 210 | // Version is valid, load the shaders |
| 161 | constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; | 211 | std::vector<ShaderDiskCacheEntry> entries; |
| 162 | std::vector<ShaderDiskCacheRaw> raws; | ||
| 163 | std::vector<ShaderDiskCacheUsage> usages; | ||
| 164 | while (file.Tell() < file.GetSize()) { | 212 | while (file.Tell() < file.GetSize()) { |
| 165 | TransferableEntryKind kind{}; | 213 | ShaderDiskCacheEntry& entry = entries.emplace_back(); |
| 166 | if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { | 214 | if (!entry.Load(file)) { |
| 167 | LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); | 215 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); |
| 168 | return {}; | ||
| 169 | } | ||
| 170 | |||
| 171 | switch (kind) { | ||
| 172 | case TransferableEntryKind::Raw: { | ||
| 173 | ShaderDiskCacheRaw entry; | ||
| 174 | if (!entry.Load(file)) { | ||
| 175 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | transferable.insert({entry.GetUniqueIdentifier(), {}}); | ||
| 179 | raws.push_back(std::move(entry)); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case TransferableEntryKind::Usage: { | ||
| 183 | ShaderDiskCacheUsage usage; | ||
| 184 | |||
| 185 | u32 num_keys{}; | ||
| 186 | u32 num_bound_samplers{}; | ||
| 187 | u32 num_bindless_samplers{}; | ||
| 188 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||
| 189 | file.ReadArray(&usage.variant, 1) != 1 || | ||
| 190 | file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 191 | file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 192 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 193 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | |||
| 197 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 198 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 199 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 200 | if (file.ReadArray(keys.data(), keys.size()) != keys.size() || | ||
| 201 | file.ReadArray(bound_samplers.data(), bound_samplers.size()) != | ||
| 202 | bound_samplers.size() || | ||
| 203 | file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 204 | bindless_samplers.size()) { | ||
| 205 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 206 | return {}; | ||
| 207 | } | ||
| 208 | for (const auto& key : keys) { | ||
| 209 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 210 | } | ||
| 211 | for (const auto& key : bound_samplers) { | ||
| 212 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 213 | } | ||
| 214 | for (const auto& key : bindless_samplers) { | ||
| 215 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 216 | } | ||
| 217 | |||
| 218 | usages.push_back(std::move(usage)); | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | default: | ||
| 222 | LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", | ||
| 223 | static_cast<u32>(kind)); | ||
| 224 | return {}; | 216 | return {}; |
| 225 | } | 217 | } |
| 226 | } | 218 | } |
| 227 | 219 | ||
| 228 | is_usable = true; | 220 | is_usable = true; |
| 229 | return {{std::move(raws), std::move(usages)}}; | 221 | return {std::move(entries)}; |
| 230 | } | 222 | } |
| 231 | 223 | ||
| 232 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> | 224 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { |
| 233 | ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 234 | if (!is_usable) { | 225 | if (!is_usable) { |
| 235 | return {}; | 226 | return {}; |
| 236 | } | 227 | } |
| 237 | 228 | ||
| 238 | std::string path = GetPrecompiledPath(); | 229 | FileUtil::IOFile file(GetPrecompiledPath(), "rb"); |
| 239 | FileUtil::IOFile file(path, "rb"); | ||
| 240 | if (!file.IsOpen()) { | 230 | if (!file.IsOpen()) { |
| 241 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | 231 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); |
| 242 | GetTitleID()); | ||
| 243 | return {}; | 232 | return {}; |
| 244 | } | 233 | } |
| 245 | 234 | ||
| 246 | const auto result = LoadPrecompiledFile(file); | 235 | if (const auto result = LoadPrecompiledFile(file)) { |
| 247 | if (!result) { | 236 | return *result; |
| 248 | LOG_INFO(Render_OpenGL, | ||
| 249 | "Failed to load precompiled cache for game with title id={}, removing", | ||
| 250 | GetTitleID()); | ||
| 251 | file.Close(); | ||
| 252 | InvalidatePrecompiled(); | ||
| 253 | return {}; | ||
| 254 | } | 237 | } |
| 255 | return *result; | 238 | |
| 239 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 240 | file.Close(); | ||
| 241 | InvalidatePrecompiled(); | ||
| 242 | return {}; | ||
| 256 | } | 243 | } |
| 257 | 244 | ||
| 258 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 245 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( |
| 259 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | 246 | FileUtil::IOFile& file) { |
| 260 | // Read compressed file from disk and decompress to virtual precompiled cache file | 247 | // Read compressed file from disk and decompress to virtual precompiled cache file |
| 261 | std::vector<u8> compressed(file.GetSize()); | 248 | std::vector<u8> compressed(file.GetSize()); |
| 262 | file.ReadBytes(compressed.data(), compressed.size()); | 249 | file.ReadBytes(compressed.data(), compressed.size()); |
| @@ -275,58 +262,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 275 | return {}; | 262 | return {}; |
| 276 | } | 263 | } |
| 277 | 264 | ||
| 278 | ShaderDumpsMap dumps; | 265 | std::vector<ShaderDiskCachePrecompiled> entries; |
| 279 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | 266 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |
| 280 | u32 num_keys{}; | 267 | u32 binary_size; |
| 281 | u32 num_bound_samplers{}; | 268 | auto& entry = entries.emplace_back(); |
| 282 | u32 num_bindless_samplers{}; | 269 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || |
| 283 | ShaderDiskCacheUsage usage; | 270 | !LoadObjectFromPrecompiled(entry.binary_format) || |
| 284 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | 271 | !LoadObjectFromPrecompiled(binary_size)) { |
| 285 | !LoadObjectFromPrecompiled(usage.variant) || | ||
| 286 | !LoadObjectFromPrecompiled(usage.bound_buffer) || | ||
| 287 | !LoadObjectFromPrecompiled(num_keys) || | ||
| 288 | !LoadObjectFromPrecompiled(num_bound_samplers) || | ||
| 289 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||
| 290 | return {}; | ||
| 291 | } | ||
| 292 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 293 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 294 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 295 | if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || | ||
| 296 | !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != | ||
| 297 | bound_samplers.size() || | ||
| 298 | !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 299 | bindless_samplers.size()) { | ||
| 300 | return {}; | ||
| 301 | } | ||
| 302 | for (const auto& key : keys) { | ||
| 303 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 304 | } | ||
| 305 | for (const auto& key : bound_samplers) { | ||
| 306 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 307 | } | ||
| 308 | for (const auto& key : bindless_samplers) { | ||
| 309 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 310 | } | ||
| 311 | |||
| 312 | ShaderDiskCacheDump dump; | ||
| 313 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||
| 314 | return {}; | ||
| 315 | } | ||
| 316 | |||
| 317 | u32 binary_length{}; | ||
| 318 | if (!LoadObjectFromPrecompiled(binary_length)) { | ||
| 319 | return {}; | 272 | return {}; |
| 320 | } | 273 | } |
| 321 | 274 | ||
| 322 | dump.binary.resize(binary_length); | 275 | entry.binary.resize(binary_size); |
| 323 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | 276 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { |
| 324 | return {}; | 277 | return {}; |
| 325 | } | 278 | } |
| 326 | |||
| 327 | dumps.emplace(std::move(usage), dump); | ||
| 328 | } | 279 | } |
| 329 | return dumps; | 280 | return entries; |
| 330 | } | 281 | } |
| 331 | 282 | ||
| 332 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | 283 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { |
| @@ -346,13 +297,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | |||
| 346 | } | 297 | } |
| 347 | } | 298 | } |
| 348 | 299 | ||
| 349 | void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | 300 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { |
| 350 | if (!is_usable) { | 301 | if (!is_usable) { |
| 351 | return; | 302 | return; |
| 352 | } | 303 | } |
| 353 | 304 | ||
| 354 | const u64 id = entry.GetUniqueIdentifier(); | 305 | const u64 id = entry.unique_identifier; |
| 355 | if (transferable.find(id) != transferable.end()) { | 306 | if (stored_transferable.find(id) != stored_transferable.end()) { |
| 356 | // The shader already exists | 307 | // The shader already exists |
| 357 | return; | 308 | return; |
| 358 | } | 309 | } |
| @@ -361,71 +312,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | |||
| 361 | if (!file.IsOpen()) { | 312 | if (!file.IsOpen()) { |
| 362 | return; | 313 | return; |
| 363 | } | 314 | } |
| 364 | if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { | 315 | if (!entry.Save(file)) { |
| 365 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | 316 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); |
| 366 | file.Close(); | 317 | file.Close(); |
| 367 | InvalidateTransferable(); | 318 | InvalidateTransferable(); |
| 368 | return; | 319 | return; |
| 369 | } | 320 | } |
| 370 | transferable.insert({id, {}}); | ||
| 371 | } | ||
| 372 | 321 | ||
| 373 | void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | 322 | stored_transferable.insert(id); |
| 374 | if (!is_usable) { | ||
| 375 | return; | ||
| 376 | } | ||
| 377 | |||
| 378 | const auto it = transferable.find(usage.unique_identifier); | ||
| 379 | ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); | ||
| 380 | |||
| 381 | auto& usages{it->second}; | ||
| 382 | if (usages.find(usage) != usages.end()) { | ||
| 383 | // Skip this variant since the shader is already stored. | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | usages.insert(usage); | ||
| 387 | |||
| 388 | FileUtil::IOFile file = AppendTransferableFile(); | ||
| 389 | if (!file.IsOpen()) | ||
| 390 | return; | ||
| 391 | const auto Close = [&] { | ||
| 392 | LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); | ||
| 393 | file.Close(); | ||
| 394 | InvalidateTransferable(); | ||
| 395 | }; | ||
| 396 | |||
| 397 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | ||
| 398 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | ||
| 399 | file.WriteObject(usage.bound_buffer) != 1 || | ||
| 400 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | ||
| 401 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||
| 402 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||
| 403 | Close(); | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | for (const auto& [pair, value] : usage.keys) { | ||
| 407 | const auto [cbuf, offset] = pair; | ||
| 408 | if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 409 | Close(); | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 414 | if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 415 | Close(); | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 420 | const auto [cbuf, offset] = pair; | ||
| 421 | if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 422 | Close(); | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | } | ||
| 426 | } | 323 | } |
| 427 | 324 | ||
| 428 | void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { | 325 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { |
| 429 | if (!is_usable) { | 326 | if (!is_usable) { |
| 430 | return; | 327 | return; |
| 431 | } | 328 | } |
| @@ -437,51 +334,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 437 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | 334 | SavePrecompiledHeaderToVirtualPrecompiledCache(); |
| 438 | } | 335 | } |
| 439 | 336 | ||
| 440 | GLint binary_length{}; | 337 | GLint binary_length; |
| 441 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | 338 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |
| 442 | 339 | ||
| 443 | GLenum binary_format{}; | 340 | GLenum binary_format; |
| 444 | std::vector<u8> binary(binary_length); | 341 | std::vector<u8> binary(binary_length); |
| 445 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 342 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 446 | 343 | ||
| 447 | const auto Close = [&] { | 344 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || |
| 345 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 346 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 448 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | 347 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", |
| 449 | usage.unique_identifier); | 348 | unique_identifier); |
| 450 | InvalidatePrecompiled(); | 349 | InvalidatePrecompiled(); |
| 451 | }; | ||
| 452 | |||
| 453 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||
| 454 | !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || | ||
| 455 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||
| 456 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||
| 457 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||
| 458 | Close(); | ||
| 459 | return; | ||
| 460 | } | ||
| 461 | for (const auto& [pair, value] : usage.keys) { | ||
| 462 | const auto [cbuf, offset] = pair; | ||
| 463 | if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 464 | Close(); | ||
| 465 | return; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 469 | if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 470 | Close(); | ||
| 471 | return; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 475 | const auto [cbuf, offset] = pair; | ||
| 476 | if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 477 | Close(); | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | } | ||
| 481 | if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||
| 482 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||
| 483 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 484 | Close(); | ||
| 485 | } | 350 | } |
| 486 | } | 351 | } |
| 487 | 352 | ||
| @@ -534,7 +399,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | |||
| 534 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { | 399 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { |
| 535 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | 400 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", |
| 536 | precompiled_path); | 401 | precompiled_path); |
| 537 | return; | ||
| 538 | } | 402 | } |
| 539 | } | 403 | } |
| 540 | 404 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ef2371f6d..0ce0ea3f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 23 | #include "video_core/shader/const_buffer_locker.h" | 22 | #include "video_core/shader/const_buffer_locker.h" |
| 24 | 23 | ||
| 25 | namespace Core { | 24 | namespace Core { |
| @@ -32,139 +31,37 @@ class IOFile; | |||
| 32 | 31 | ||
| 33 | namespace OpenGL { | 32 | namespace OpenGL { |
| 34 | 33 | ||
| 35 | struct ShaderDiskCacheUsage; | ||
| 36 | struct ShaderDiskCacheDump; | ||
| 37 | |||
| 38 | using ProgramCode = std::vector<u64>; | 34 | using ProgramCode = std::vector<u64>; |
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 40 | |||
| 41 | /// Describes the different variants a program can be compiled with. | ||
| 42 | struct ProgramVariant final { | ||
| 43 | ProgramVariant() = default; | ||
| 44 | |||
| 45 | /// Graphics constructor. | ||
| 46 | explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept | ||
| 47 | : primitive_mode{primitive_mode} {} | ||
| 48 | |||
| 49 | /// Compute constructor. | ||
| 50 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, | ||
| 51 | u32 local_memory_size) noexcept | ||
| 52 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, | ||
| 53 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} | ||
| 54 | |||
| 55 | // Graphics specific parameters. | ||
| 56 | GLenum primitive_mode{}; | ||
| 57 | |||
| 58 | // Compute specific parameters. | ||
| 59 | u32 block_x{}; | ||
| 60 | u16 block_y{}; | ||
| 61 | u16 block_z{}; | ||
| 62 | u32 shared_memory_size{}; | ||
| 63 | u32 local_memory_size{}; | ||
| 64 | |||
| 65 | bool operator==(const ProgramVariant& rhs) const noexcept { | ||
| 66 | return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, | ||
| 67 | local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, | ||
| 68 | rhs.block_z, rhs.shared_memory_size, | ||
| 69 | rhs.local_memory_size); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool operator!=(const ProgramVariant& rhs) const noexcept { | ||
| 73 | return !operator==(rhs); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||
| 77 | |||
| 78 | /// Describes how a shader is used. | ||
| 79 | struct ShaderDiskCacheUsage { | ||
| 80 | u64 unique_identifier{}; | ||
| 81 | ProgramVariant variant; | ||
| 82 | u32 bound_buffer{}; | ||
| 83 | VideoCommon::Shader::KeyMap keys; | ||
| 84 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 85 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 86 | |||
| 87 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | ||
| 88 | return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == | ||
| 89 | std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, | ||
| 90 | rhs.bindless_samplers); | ||
| 91 | } | ||
| 92 | |||
| 93 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | ||
| 94 | return !operator==(rhs); | ||
| 95 | } | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace OpenGL | ||
| 99 | |||
| 100 | namespace std { | ||
| 101 | |||
| 102 | template <> | ||
| 103 | struct hash<OpenGL::ProgramVariant> { | ||
| 104 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | ||
| 105 | return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ | ||
| 106 | static_cast<std::size_t>(variant.block_x) ^ | ||
| 107 | (static_cast<std::size_t>(variant.block_y) << 32) ^ | ||
| 108 | (static_cast<std::size_t>(variant.block_z) << 48) ^ | ||
| 109 | (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ | ||
| 110 | (static_cast<std::size_t>(variant.local_memory_size) << 36); | ||
| 111 | } | ||
| 112 | }; | ||
| 113 | |||
| 114 | template <> | ||
| 115 | struct hash<OpenGL::ShaderDiskCacheUsage> { | ||
| 116 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | ||
| 117 | return static_cast<std::size_t>(usage.unique_identifier) ^ | ||
| 118 | std::hash<OpenGL::ProgramVariant>{}(usage.variant); | ||
| 119 | } | ||
| 120 | }; | ||
| 121 | |||
| 122 | } // namespace std | ||
| 123 | |||
| 124 | namespace OpenGL { | ||
| 125 | 35 | ||
| 126 | /// Describes a shader how it's used by the guest GPU | 36 | /// Describes a shader and how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 37 | struct ShaderDiskCacheEntry { |
| 128 | public: | 38 | ShaderDiskCacheEntry(); |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, | 39 | ~ShaderDiskCacheEntry(); |
| 130 | ProgramCode code, ProgramCode code_b = {}); | ||
| 131 | ShaderDiskCacheRaw(); | ||
| 132 | ~ShaderDiskCacheRaw(); | ||
| 133 | 40 | ||
| 134 | bool Load(FileUtil::IOFile& file); | 41 | bool Load(FileUtil::IOFile& file); |
| 135 | 42 | ||
| 136 | bool Save(FileUtil::IOFile& file) const; | 43 | bool Save(FileUtil::IOFile& file) const; |
| 137 | 44 | ||
| 138 | u64 GetUniqueIdentifier() const { | ||
| 139 | return unique_identifier; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool HasProgramA() const { | 45 | bool HasProgramA() const { |
| 143 | return !code.empty() && !code_b.empty(); | 46 | return !code.empty() && !code_b.empty(); |
| 144 | } | 47 | } |
| 145 | 48 | ||
| 146 | Tegra::Engines::ShaderType GetType() const { | ||
| 147 | return type; | ||
| 148 | } | ||
| 149 | |||
| 150 | const ProgramCode& GetCode() const { | ||
| 151 | return code; | ||
| 152 | } | ||
| 153 | |||
| 154 | const ProgramCode& GetCodeB() const { | ||
| 155 | return code_b; | ||
| 156 | } | ||
| 157 | |||
| 158 | private: | ||
| 159 | u64 unique_identifier{}; | ||
| 160 | Tegra::Engines::ShaderType type{}; | 49 | Tegra::Engines::ShaderType type{}; |
| 161 | ProgramCode code; | 50 | ProgramCode code; |
| 162 | ProgramCode code_b; | 51 | ProgramCode code_b; |
| 52 | |||
| 53 | u64 unique_identifier = 0; | ||
| 54 | u32 bound_buffer = 0; | ||
| 55 | std::optional<u32> texture_handler_size; | ||
| 56 | VideoCommon::Shader::KeyMap keys; | ||
| 57 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 58 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 163 | }; | 59 | }; |
| 164 | 60 | ||
| 165 | /// Contains an OpenGL dumped binary program | 61 | /// Contains an OpenGL dumped binary program |
| 166 | struct ShaderDiskCacheDump { | 62 | struct ShaderDiskCachePrecompiled { |
| 167 | GLenum binary_format{}; | 63 | u64 unique_identifier = 0; |
| 64 | GLenum binary_format = 0; | ||
| 168 | std::vector<u8> binary; | 65 | std::vector<u8> binary; |
| 169 | }; | 66 | }; |
| 170 | 67 | ||
| @@ -174,11 +71,10 @@ public: | |||
| 174 | ~ShaderDiskCacheOpenGL(); | 71 | ~ShaderDiskCacheOpenGL(); |
| 175 | 72 | ||
| 176 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | 73 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. |
| 177 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 74 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); |
| 178 | LoadTransferable(); | ||
| 179 | 75 | ||
| 180 | /// Loads current game's precompiled cache. Invalidates on failure. | 76 | /// Loads current game's precompiled cache. Invalidates on failure. |
| 181 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); | 77 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); |
| 182 | 78 | ||
| 183 | /// Removes the transferable (and precompiled) cache file. | 79 | /// Removes the transferable (and precompiled) cache file. |
| 184 | void InvalidateTransferable(); | 80 | void InvalidateTransferable(); |
| @@ -187,21 +83,18 @@ public: | |||
| 187 | void InvalidatePrecompiled(); | 83 | void InvalidatePrecompiled(); |
| 188 | 84 | ||
| 189 | /// Saves a raw dump to the transferable file. Checks for collisions. | 85 | /// Saves a raw dump to the transferable file. Checks for collisions. |
| 190 | void SaveRaw(const ShaderDiskCacheRaw& entry); | 86 | void SaveEntry(const ShaderDiskCacheEntry& entry); |
| 191 | |||
| 192 | /// Saves shader usage to the transferable file. Does not check for collisions. | ||
| 193 | void SaveUsage(const ShaderDiskCacheUsage& usage); | ||
| 194 | 87 | ||
| 195 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | 88 | /// Saves a dump entry to the precompiled file. Does not check for collisions. |
| 196 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | 89 | void SavePrecompiled(u64 unique_identifier, GLuint program); |
| 197 | 90 | ||
| 198 | /// Serializes virtual precompiled shader cache file to real file | 91 | /// Serializes virtual precompiled shader cache file to real file |
| 199 | void SaveVirtualPrecompiledFile(); | 92 | void SaveVirtualPrecompiledFile(); |
| 200 | 93 | ||
| 201 | private: | 94 | private: |
| 202 | /// Loads the transferable cache. Returns empty on failure. | 95 | /// Loads the transferable cache. Returns empty on failure. |
| 203 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 96 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( |
| 204 | LoadPrecompiledFile(FileUtil::IOFile& file); | 97 | FileUtil::IOFile& file); |
| 205 | 98 | ||
| 206 | /// Opens current game's transferable file and write it's header if it doesn't exist | 99 | /// Opens current game's transferable file and write it's header if it doesn't exist |
| 207 | FileUtil::IOFile AppendTransferableFile() const; | 100 | FileUtil::IOFile AppendTransferableFile() const; |
| @@ -270,7 +163,7 @@ private: | |||
| 270 | std::size_t precompiled_cache_virtual_file_offset = 0; | 163 | std::size_t precompiled_cache_virtual_file_offset = 0; |
| 271 | 164 | ||
| 272 | // Stored transferable shaders | 165 | // Stored transferable shaders |
| 273 | std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | 166 | std::unordered_set<u64> stored_transferable; |
| 274 | 167 | ||
| 275 | // The cache has been loaded at boot | 168 | // The cache has been loaded at boot |
| 276 | bool is_usable{}; | 169 | bool is_usable{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp deleted file mode 100644 index 34946fb47..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ /dev/null | |||
| @@ -1,109 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace OpenGL::GLShader { | ||
| 17 | |||
| 18 | using Tegra::Engines::Maxwell3D; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | using VideoCommon::Shader::CompileDepth; | ||
| 21 | using VideoCommon::Shader::CompilerSettings; | ||
| 22 | using VideoCommon::Shader::ProgramCode; | ||
| 23 | using VideoCommon::Shader::ShaderIR; | ||
| 24 | |||
| 25 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | ||
| 26 | std::string out = GetCommonDeclarations(); | ||
| 27 | out += fmt::format(R"( | ||
| 28 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 29 | float y_direction; | ||
| 30 | }}; | ||
| 31 | |||
| 32 | )", | ||
| 33 | EmulationUniformBlockBinding); | ||
| 34 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); | ||
| 35 | if (ir_b) { | ||
| 36 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); | ||
| 37 | } | ||
| 38 | |||
| 39 | out += R"( | ||
| 40 | void main() { | ||
| 41 | gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f); | ||
| 42 | execute_vertex(); | ||
| 43 | )"; | ||
| 44 | if (ir_b) { | ||
| 45 | out += " execute_vertex_b();"; | ||
| 46 | } | ||
| 47 | out += "}\n"; | ||
| 48 | return out; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | ||
| 52 | std::string out = GetCommonDeclarations(); | ||
| 53 | out += fmt::format(R"( | ||
| 54 | layout (std140, binding = {}) uniform gs_config {{ | ||
| 55 | float y_direction; | ||
| 56 | }}; | ||
| 57 | |||
| 58 | )", | ||
| 59 | EmulationUniformBlockBinding); | ||
| 60 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); | ||
| 61 | |||
| 62 | out += R"( | ||
| 63 | void main() { | ||
| 64 | execute_geometry(); | ||
| 65 | } | ||
| 66 | )"; | ||
| 67 | return out; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | ||
| 71 | std::string out = GetCommonDeclarations(); | ||
| 72 | out += fmt::format(R"( | ||
| 73 | layout (location = 0) out vec4 FragColor0; | ||
| 74 | layout (location = 1) out vec4 FragColor1; | ||
| 75 | layout (location = 2) out vec4 FragColor2; | ||
| 76 | layout (location = 3) out vec4 FragColor3; | ||
| 77 | layout (location = 4) out vec4 FragColor4; | ||
| 78 | layout (location = 5) out vec4 FragColor5; | ||
| 79 | layout (location = 6) out vec4 FragColor6; | ||
| 80 | layout (location = 7) out vec4 FragColor7; | ||
| 81 | |||
| 82 | layout (std140, binding = {}) uniform fs_config {{ | ||
| 83 | float y_direction; | ||
| 84 | }}; | ||
| 85 | |||
| 86 | )", | ||
| 87 | EmulationUniformBlockBinding); | ||
| 88 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); | ||
| 89 | |||
| 90 | out += R"( | ||
| 91 | void main() { | ||
| 92 | execute_fragment(); | ||
| 93 | } | ||
| 94 | )"; | ||
| 95 | return out; | ||
| 96 | } | ||
| 97 | |||
| 98 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { | ||
| 99 | std::string out = GetCommonDeclarations(); | ||
| 100 | out += Decompile(device, ir, ShaderType::Compute, "compute"); | ||
| 101 | out += R"( | ||
| 102 | void main() { | ||
| 103 | execute_compute(); | ||
| 104 | } | ||
| 105 | )"; | ||
| 106 | return out; | ||
| 107 | } | ||
| 108 | |||
| 109 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h deleted file mode 100644 index cba2be9f9..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ /dev/null | |||
| @@ -1,34 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | class Device; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace OpenGL::GLShader { | ||
| 18 | |||
| 19 | using VideoCommon::Shader::ProgramCode; | ||
| 20 | using VideoCommon::Shader::ShaderIR; | ||
| 21 | |||
| 22 | /// Generates the GLSL vertex shader program source code for the given VS program | ||
| 23 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); | ||
| 24 | |||
| 25 | /// Generates the GLSL geometry shader program source code for the given GS program | ||
| 26 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); | ||
| 27 | |||
| 28 | /// Generates the GLSL fragment shader program source code for the given FS program | ||
| 29 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); | ||
| 30 | |||
| 31 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 32 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); | ||
| 33 | |||
| 34 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 0638be8cb..c859dd7ca 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -14,8 +14,9 @@ namespace VideoCommon::Shader { | |||
| 14 | 14 | ||
| 15 | using Tegra::Engines::SamplerDescriptor; | 15 | using Tegra::Engines::SamplerDescriptor; |
| 16 | 16 | ||
| 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, |
| 18 | : stage{shader_stage} {} | 18 | VideoCore::GuestDriverProfile stored_guest_driver_profile) |
| 19 | : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} | ||
| 19 | 20 | ||
| 20 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | 21 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, |
| 21 | Tegra::Engines::ConstBufferEngineInterface& engine) | 22 | Tegra::Engines::ConstBufferEngineInterface& engine) |
| @@ -97,7 +98,7 @@ void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | |||
| 97 | 98 | ||
| 98 | bool ConstBufferLocker::IsConsistent() const { | 99 | bool ConstBufferLocker::IsConsistent() const { |
| 99 | if (!engine) { | 100 | if (!engine) { |
| 100 | return false; | 101 | return true; |
| 101 | } | 102 | } |
| 102 | return std::all_of(keys.begin(), keys.end(), | 103 | return std::all_of(keys.begin(), keys.end(), |
| 103 | [this](const auto& pair) { | 104 | [this](const auto& pair) { |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d3ea11087..7c6f7bbdd 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -26,7 +26,8 @@ using BindlessSamplerMap = | |||
| 26 | */ | 26 | */ |
| 27 | class ConstBufferLocker { | 27 | class ConstBufferLocker { |
| 28 | public: | 28 | public: |
| 29 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | 29 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, |
| 30 | VideoCore::GuestDriverProfile stored_guest_driver_profile); | ||
| 30 | 31 | ||
| 31 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | 32 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, |
| 32 | Tegra::Engines::ConstBufferEngineInterface& engine); | 33 | Tegra::Engines::ConstBufferEngineInterface& engine); |
| @@ -83,15 +84,13 @@ public: | |||
| 83 | } | 84 | } |
| 84 | 85 | ||
| 85 | /// Obtains access to the guest driver's profile. | 86 | /// Obtains access to the guest driver's profile. |
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | 87 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { |
| 87 | if (engine) { | 88 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; |
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | 89 | } |
| 92 | 90 | ||
| 93 | private: | 91 | private: |
| 94 | const Tegra::Engines::ShaderType stage; | 92 | const Tegra::Engines::ShaderType stage; |
| 93 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | 94 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
| 96 | KeyMap keys; | 95 | KeyMap keys; |
| 97 | BoundSamplerMap bound_samplers; | 96 | BoundSamplerMap bound_samplers; |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..af4490d66 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 34 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 38 | const std::list<Sampler>& used_samplers) { | 38 | const std::list<Sampler>& used_samplers) { |
| 39 | if (gpu_driver == nullptr) { | 39 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | 40 | return; |
| 45 | } | 41 | } |
| 46 | u32 count{}; | 42 | u32 count{}; |
| @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | |||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | 49 | bound_offsets.emplace_back(sampler.GetOffset()); |
| 54 | } | 50 | } |
| 55 | if (count > 1) { | 51 | if (count > 1) { |
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | 52 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); |
| 57 | } | 53 | } |
| 58 | } | 54 | } |
| 59 | 55 | ||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 56 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, |
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | 57 | VideoCore::GuestDriverProfile& gpu_driver, |
| 62 | const std::list<Sampler>& used_samplers) { | 58 | const std::list<Sampler>& used_samplers) { |
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | 59 | const u32 base_offset = sampler_to_deduce.GetOffset(); |
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | 60 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 69 | for (const auto& sampler : used_samplers) { | 61 | for (const auto& sampler : used_samplers) { |
| @@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | |||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | 69 | if (max_offset == std::numeric_limits<u32>::max()) { |
| 78 | return std::nullopt; | 70 | return std::nullopt; |
| 79 | } | 71 | } |
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | 72 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); |
| 81 | } | 73 | } |
| 82 | 74 | ||
| 83 | } // Anonymous namespace | 75 | } // Anonymous namespace |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..b1a0aa00c 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -94,13 +94,10 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 94 | } | 94 | } |
| 95 | auto [gpr, base_offset] = *pair; | 95 | auto [gpr, base_offset] = *pair; |
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); |
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 97 | const auto& gpu_driver = locker.AccessGuestDriverProfile(); |
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | 98 | const u32 bindless_cv = NewCustomVariable(); |
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | 99 | const Node op = |
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | 100 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); |
| 104 | 101 | ||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | 102 | const Node cv_node = GetCustomVariable(bindless_cv); |
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | 103 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); |