diff options
| author | 2019-07-14 22:25:13 -0300 | |
|---|---|---|
| committer | 2019-07-15 17:38:25 -0300 | |
| commit | 725ba6cf6336fb0f1be4e5250c15767d60e28259 (patch) | |
| tree | ed9c302a2b7735f64b09434469f4e9714122e789 | |
| parent | Merge pull request #2695 from ReinUsesLisp/layer-viewport (diff) | |
| download | yuzu-725ba6cf6336fb0f1be4e5250c15767d60e28259.tar.gz yuzu-725ba6cf6336fb0f1be4e5250c15767d60e28259.tar.xz yuzu-725ba6cf6336fb0f1be4e5250c15767d60e28259.zip | |
gl_rasterizer: Implement compute shaders
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 97 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 157 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 60 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 38 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.cpp | 20 |
15 files changed, 350 insertions, 136 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 7404a8163..089465a71 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | 52 | void KeplerCompute::ProcessLaunch() { |
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 53 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 54 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 55 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 57 | 56 | ||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | 57 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | 58 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |
| 59 | |||
| 60 | rasterizer.DispatchCompute(code_addr); | ||
| 60 | } | 61 | } |
| 61 | 62 | ||
| 62 | } // namespace Tegra::Engines | 63 | } // namespace Tegra::Engines |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1b4975498..e25754e37 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
| 50 | return *maxwell_3d; | 50 | return *maxwell_3d; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | Engines::KeplerCompute& GPU::KeplerCompute() { | ||
| 54 | return *kepler_compute; | ||
| 55 | } | ||
| 56 | |||
| 57 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||
| 58 | return *kepler_compute; | ||
| 59 | } | ||
| 60 | |||
| 53 | MemoryManager& GPU::MemoryManager() { | 61 | MemoryManager& GPU::MemoryManager() { |
| 54 | return *memory_manager; | 62 | return *memory_manager; |
| 55 | } | 63 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..0ace0ff4f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -155,6 +155,12 @@ public: | |||
| 155 | /// Returns a const reference to the Maxwell3D GPU engine. | 155 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 156 | const Engines::Maxwell3D& Maxwell3D() const; | 156 | const Engines::Maxwell3D& Maxwell3D() const; |
| 157 | 157 | ||
| 158 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 159 | Engines::KeplerCompute& KeplerCompute(); | ||
| 160 | |||
| 161 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 162 | const Engines::KeplerCompute& KeplerCompute() const; | ||
| 163 | |||
| 158 | /// Returns a reference to the GPU memory manager. | 164 | /// Returns a reference to the GPU memory manager. |
| 159 | Tegra::MemoryManager& MemoryManager(); | 165 | Tegra::MemoryManager& MemoryManager(); |
| 160 | 166 | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2b7367568..9881df0d5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -34,6 +34,9 @@ public: | |||
| 34 | /// Clear the current framebuffer | 34 | /// Clear the current framebuffer |
| 35 | virtual void Clear() = 0; | 35 | virtual void Clear() = 0; |
| 36 | 36 | ||
| 37 | /// Dispatches a compute shader invocation | ||
| 38 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||
| 39 | |||
| 37 | /// Notify rasterizer that all caches should be flushed to Switch memory | 40 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 38 | virtual void FlushAll() = 0; | 41 | virtual void FlushAll() = 0; |
| 39 | 42 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0bb5c068c..f4728f50c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <bitset> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -19,6 +20,7 @@ | |||
| 19 | #include "core/core.h" | 20 | #include "core/core.h" |
| 20 | #include "core/hle/kernel/process.h" | 21 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -298,9 +300,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 298 | 300 | ||
| 299 | Shader shader{shader_cache.GetStageProgram(program)}; | 301 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 300 | 302 | ||
| 301 | const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; | 303 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 302 | SetupDrawConstBuffers(stage_enum, shader); | 304 | SetupDrawConstBuffers(stage_enum, shader); |
| 303 | SetupGlobalRegions(stage_enum, shader); | 305 | SetupDrawGlobalMemory(stage_enum, shader); |
| 304 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | 306 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; |
| 305 | 307 | ||
| 306 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | 308 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; |
| @@ -702,6 +704,43 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 702 | accelerate_draw = AccelDraw::Disabled; | 704 | accelerate_draw = AccelDraw::Disabled; |
| 703 | } | 705 | } |
| 704 | 706 | ||
| 707 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||
| 708 | if (!GLAD_GL_ARB_compute_variable_group_size) { | ||
| 709 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 710 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 711 | return; | ||
| 712 | } | ||
| 713 | |||
| 714 | auto kernel = shader_cache.GetComputeKernel(code_addr); | ||
| 715 | const auto [program, next_bindings] = kernel->GetProgramHandle({}); | ||
| 716 | state.draw.shader_program = program; | ||
| 717 | state.draw.program_pipeline = 0; | ||
| 718 | |||
| 719 | const std::size_t buffer_size = | ||
| 720 | Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 721 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 722 | buffer_cache.Map(buffer_size); | ||
| 723 | |||
| 724 | bind_ubo_pushbuffer.Setup(0); | ||
| 725 | bind_ssbo_pushbuffer.Setup(0); | ||
| 726 | |||
| 727 | SetupComputeConstBuffers(kernel); | ||
| 728 | SetupComputeGlobalMemory(kernel); | ||
| 729 | |||
| 730 | buffer_cache.Unmap(); | ||
| 731 | |||
| 732 | bind_ubo_pushbuffer.Bind(); | ||
| 733 | bind_ssbo_pushbuffer.Bind(); | ||
| 734 | |||
| 735 | state.ApplyShaderProgram(); | ||
| 736 | state.ApplyProgramPipeline(); | ||
| 737 | |||
| 738 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 739 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 740 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 741 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 742 | } | ||
| 743 | |||
| 705 | void RasterizerOpenGL::FlushAll() {} | 744 | void RasterizerOpenGL::FlushAll() {} |
| 706 | 745 | ||
| 707 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | 746 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { |
| @@ -775,12 +814,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 775 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 814 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 776 | const Shader& shader) { | 815 | const Shader& shader) { |
| 777 | MICROPROFILE_SCOPE(OpenGL_UBO); | 816 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 778 | const auto stage_index = static_cast<std::size_t>(stage); | 817 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 779 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 818 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; |
| 780 | |||
| 781 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | ||
| 782 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 819 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 783 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 820 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 821 | SetupConstBuffer(buffer, entry); | ||
| 822 | } | ||
| 823 | } | ||
| 824 | |||
| 825 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | ||
| 826 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 827 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 828 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | ||
| 829 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 830 | const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); | ||
| 831 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 832 | buffer.address = config.Address(); | ||
| 833 | buffer.size = config.size; | ||
| 834 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 835 | SetupConstBuffer(buffer, entry); | ||
| 784 | } | 836 | } |
| 785 | } | 837 | } |
| 786 | 838 | ||
| @@ -801,24 +853,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 801 | bind_ubo_pushbuffer.Push(cbuf, offset, size); | 853 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 802 | } | 854 | } |
| 803 | 855 | ||
| 804 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 856 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 805 | const Shader& shader) { | 857 | const Shader& shader) { |
| 806 | auto& gpu{system.GPU()}; | 858 | auto& gpu{system.GPU()}; |
| 807 | auto& memory_manager{gpu.MemoryManager()}; | 859 | auto& memory_manager{gpu.MemoryManager()}; |
| 808 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | 860 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 809 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 810 | |||
| 811 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 861 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 812 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 862 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 813 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | 863 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 814 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 864 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 865 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 866 | } | ||
| 867 | } | ||
| 815 | 868 | ||
| 816 | const auto [ssbo, buffer_offset] = | 869 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { |
| 817 | buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); | 870 | auto& gpu{system.GPU()}; |
| 818 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | 871 | auto& memory_manager{gpu.MemoryManager()}; |
| 872 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||
| 873 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||
| 874 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 875 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||
| 876 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 877 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 819 | } | 878 | } |
| 820 | } | 879 | } |
| 821 | 880 | ||
| 881 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | ||
| 882 | GPUVAddr gpu_addr, std::size_t size) { | ||
| 883 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 884 | const auto [ssbo, buffer_offset] = | ||
| 885 | buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); | ||
| 886 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 887 | } | ||
| 888 | |||
| 822 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 889 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 823 | BaseBindings base_bindings) { | 890 | BaseBindings base_bindings) { |
| 824 | MICROPROFILE_SCOPE(OpenGL_Texture); | 891 | MICROPROFILE_SCOPE(OpenGL_Texture); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 40b571d58..b2b671230 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | 58 | ||
| 59 | void DrawArrays() override; | 59 | void DrawArrays() override; |
| 60 | void Clear() override; | 60 | void Clear() override; |
| 61 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 61 | void FlushAll() override; | 62 | void FlushAll() override; |
| 62 | void FlushRegion(CacheAddr addr, u64 size) override; | 63 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 63 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| @@ -112,13 +113,23 @@ private: | |||
| 112 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 113 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 113 | const Shader& shader); | 114 | const Shader& shader); |
| 114 | 115 | ||
| 116 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 117 | void SetupComputeConstBuffers(const Shader& kernel); | ||
| 118 | |||
| 115 | /// Configures a constant buffer. | 119 | /// Configures a constant buffer. |
| 116 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 120 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, |
| 117 | const GLShader::ConstBufferEntry& entry); | 121 | const GLShader::ConstBufferEntry& entry); |
| 118 | 122 | ||
| 119 | /// Configures the current global memory entries to use for the draw command. | 123 | /// Configures the current global memory entries to use for the draw command. |
| 120 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 124 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 121 | const Shader& shader); | 125 | const Shader& shader); |
| 126 | |||
| 127 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 128 | void SetupComputeGlobalMemory(const Shader& kernel); | ||
| 129 | |||
| 130 | /// Configures a constant buffer. | ||
| 131 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 132 | std::size_t size); | ||
| 122 | 133 | ||
| 123 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 134 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 124 | /// usage. | 135 | /// usage. |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 32dd9eae7..7e95c2daa 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -23,13 +23,13 @@ namespace OpenGL { | |||
| 23 | 23 | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 24 | using VideoCommon::Shader::ProgramCode; |
| 25 | 25 | ||
| 26 | // One UBO is always reserved for emulation values | 26 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 RESERVED_UBOS = 1; | 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 28 | ||
| 29 | struct UnspecializedShader { | 29 | struct UnspecializedShader { |
| 30 | std::string code; | 30 | std::string code; |
| 31 | GLShader::ShaderEntries entries; | 31 | GLShader::ShaderEntries entries; |
| 32 | Maxwell::ShaderProgram program_type; | 32 | ProgramType program_type; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /// Gets the shader type from a Maxwell program type | 57 | /// Gets the shader type from a Maxwell program type |
| 58 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | 58 | constexpr GLenum GetShaderType(ProgramType program_type) { |
| 59 | switch (program_type) { | 59 | switch (program_type) { |
| 60 | case Maxwell::ShaderProgram::VertexA: | 60 | case ProgramType::VertexA: |
| 61 | case Maxwell::ShaderProgram::VertexB: | 61 | case ProgramType::VertexB: |
| 62 | return GL_VERTEX_SHADER; | 62 | return GL_VERTEX_SHADER; |
| 63 | case Maxwell::ShaderProgram::Geometry: | 63 | case ProgramType::Geometry: |
| 64 | return GL_GEOMETRY_SHADER; | 64 | return GL_GEOMETRY_SHADER; |
| 65 | case Maxwell::ShaderProgram::Fragment: | 65 | case ProgramType::Fragment: |
| 66 | return GL_FRAGMENT_SHADER; | 66 | return GL_FRAGMENT_SHADER; |
| 67 | case ProgramType::Compute: | ||
| 68 | return GL_COMPUTE_SHADER; | ||
| 67 | default: | 69 | default: |
| 68 | return GL_NONE; | 70 | return GL_NONE; |
| 69 | } | 71 | } |
| @@ -100,8 +102,29 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 100 | } | 102 | } |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | constexpr ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 106 | switch (program) { | ||
| 107 | case Maxwell::ShaderProgram::VertexA: | ||
| 108 | return ProgramType::VertexA; | ||
| 109 | case Maxwell::ShaderProgram::VertexB: | ||
| 110 | return ProgramType::VertexB; | ||
| 111 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 112 | return ProgramType::TessellationControl; | ||
| 113 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 114 | return ProgramType::TessellationEval; | ||
| 115 | case Maxwell::ShaderProgram::Geometry: | ||
| 116 | return ProgramType::Geometry; | ||
| 117 | case Maxwell::ShaderProgram::Fragment: | ||
| 118 | return ProgramType::Fragment; | ||
| 119 | } | ||
| 120 | UNREACHABLE(); | ||
| 121 | } | ||
| 122 | |||
| 103 | /// Calculates the size of a program stream | 123 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 124 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 125 | if (program.empty()) { | ||
| 126 | return 0; | ||
| 127 | } | ||
| 105 | constexpr std::size_t start_offset = 10; | 128 | constexpr std::size_t start_offset = 10; |
| 106 | // This is the encoded version of BRA that jumps to itself. All Nvidia | 129 | // This is the encoded version of BRA that jumps to itself. All Nvidia |
| 107 | // shaders end with one. | 130 | // shaders end with one. |
| @@ -128,13 +151,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 128 | } | 151 | } |
| 129 | 152 | ||
| 130 | /// Hashes one (or two) program streams | 153 | /// Hashes one (or two) program streams |
| 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 154 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 132 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | 155 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 133 | if (size_a == 0) { | 156 | if (size_a == 0) { |
| 134 | size_a = CalculateProgramSize(code); | 157 | size_a = CalculateProgramSize(code); |
| 135 | } | 158 | } |
| 136 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | 159 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); |
| 137 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 160 | if (program_type != ProgramType::VertexA) { |
| 138 | return unique_identifier; | 161 | return unique_identifier; |
| 139 | } | 162 | } |
| 140 | // VertexA programs include two programs | 163 | // VertexA programs include two programs |
| @@ -152,12 +175,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 152 | } | 175 | } |
| 153 | 176 | ||
| 154 | /// Creates an unspecialized program from code streams | 177 | /// Creates an unspecialized program from code streams |
| 155 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 178 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, |
| 156 | ProgramCode program_code, ProgramCode program_code_b) { | 179 | ProgramCode program_code, ProgramCode program_code_b) { |
| 157 | GLShader::ShaderSetup setup(program_code); | 180 | GLShader::ShaderSetup setup(program_code); |
| 158 | setup.program.size_a = CalculateProgramSize(program_code); | 181 | setup.program.size_a = CalculateProgramSize(program_code); |
| 159 | setup.program.size_b = 0; | 182 | setup.program.size_b = 0; |
| 160 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 183 | if (program_type == ProgramType::VertexA) { |
| 161 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 184 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 162 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 185 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 163 | // stage here. | 186 | // stage here. |
| @@ -168,22 +191,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr | |||
| 168 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | 191 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 169 | 192 | ||
| 170 | switch (program_type) { | 193 | switch (program_type) { |
| 171 | case Maxwell::ShaderProgram::VertexA: | 194 | case ProgramType::VertexA: |
| 172 | case Maxwell::ShaderProgram::VertexB: | 195 | case ProgramType::VertexB: |
| 173 | return GLShader::GenerateVertexShader(device, setup); | 196 | return GLShader::GenerateVertexShader(device, setup); |
| 174 | case Maxwell::ShaderProgram::Geometry: | 197 | case ProgramType::Geometry: |
| 175 | return GLShader::GenerateGeometryShader(device, setup); | 198 | return GLShader::GenerateGeometryShader(device, setup); |
| 176 | case Maxwell::ShaderProgram::Fragment: | 199 | case ProgramType::Fragment: |
| 177 | return GLShader::GenerateFragmentShader(device, setup); | 200 | return GLShader::GenerateFragmentShader(device, setup); |
| 201 | case ProgramType::Compute: | ||
| 202 | return GLShader::GenerateComputeShader(device, setup); | ||
| 178 | default: | 203 | default: |
| 179 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 204 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 180 | UNREACHABLE(); | ||
| 181 | return {}; | 205 | return {}; |
| 182 | } | 206 | } |
| 183 | } | 207 | } |
| 184 | 208 | ||
| 185 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 209 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 186 | Maxwell::ShaderProgram program_type, const ProgramVariant& variant, | 210 | ProgramType program_type, const ProgramVariant& variant, |
| 187 | bool hint_retrievable = false) { | 211 | bool hint_retrievable = false) { |
| 188 | auto base_bindings{variant.base_bindings}; | 212 | auto base_bindings{variant.base_bindings}; |
| 189 | const auto primitive_mode{variant.primitive_mode}; | 213 | const auto primitive_mode{variant.primitive_mode}; |
| @@ -194,7 +218,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 194 | if (entries.shader_viewport_layer_array) { | 218 | if (entries.shader_viewport_layer_array) { |
| 195 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 219 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; |
| 196 | } | 220 | } |
| 197 | source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 221 | if (program_type == ProgramType::Compute) { |
| 222 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 223 | } | ||
| 224 | source += '\n'; | ||
| 225 | |||
| 226 | if (program_type != ProgramType::Compute) { | ||
| 227 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 228 | } | ||
| 198 | 229 | ||
| 199 | for (const auto& cbuf : entries.const_buffers) { | 230 | for (const auto& cbuf : entries.const_buffers) { |
| 200 | source += | 231 | source += |
| @@ -221,13 +252,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 221 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | 252 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); |
| 222 | } | 253 | } |
| 223 | 254 | ||
| 224 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 255 | if (program_type == ProgramType::Geometry) { |
| 225 | const auto [glsl_topology, debug_name, max_vertices] = | 256 | const auto [glsl_topology, debug_name, max_vertices] = |
| 226 | GetPrimitiveDescription(primitive_mode); | 257 | GetPrimitiveDescription(primitive_mode); |
| 227 | 258 | ||
| 228 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 259 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; |
| 229 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 260 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 230 | } | 261 | } |
| 262 | if (program_type == ProgramType::Compute) { | ||
| 263 | source += "layout (local_size_variable) in;\n"; | ||
| 264 | } | ||
| 231 | 265 | ||
| 232 | source += code; | 266 | source += code; |
| 233 | 267 | ||
| @@ -255,7 +289,7 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 255 | 289 | ||
| 256 | } // Anonymous namespace | 290 | } // Anonymous namespace |
| 257 | 291 | ||
| 258 | CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 292 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 259 | GLShader::ProgramResult result) | 293 | GLShader::ProgramResult result) |
| 260 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | 294 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 261 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 295 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| @@ -264,33 +298,55 @@ CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgra | |||
| 264 | 298 | ||
| 265 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 299 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 266 | Maxwell::ShaderProgram program_type, | 300 | Maxwell::ShaderProgram program_type, |
| 267 | ProgramCode&& program_code, | 301 | const ProgramCode& program_code, |
| 268 | ProgramCode&& program_code_b) { | 302 | const ProgramCode& program_code_b) { |
| 269 | const auto code_size{CalculateProgramSize(program_code)}; | 303 | const auto code_size{CalculateProgramSize(program_code)}; |
| 270 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | 304 | const auto code_size_b{CalculateProgramSize(program_code_b)}; |
| 271 | auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; | 305 | auto result{ |
| 306 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 272 | if (result.first.empty()) { | 307 | if (result.first.empty()) { |
| 273 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 308 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 274 | return {}; | 309 | return {}; |
| 275 | } | 310 | } |
| 276 | 311 | ||
| 277 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 312 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 278 | params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), | 313 | params.unique_identifier, GetProgramType(program_type), |
| 279 | static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), | 314 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |
| 280 | std::move(program_code_b))); | 315 | std::move(program_code), std::move(program_code_b))); |
| 281 | 316 | ||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 317 | return std::shared_ptr<CachedShader>( |
| 318 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 283 | } | 319 | } |
| 284 | 320 | ||
| 285 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 321 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, |
| 286 | Maxwell::ShaderProgram program_type, | 322 | Maxwell::ShaderProgram program_type, |
| 287 | GLShader::ProgramResult result) { | 323 | GLShader::ProgramResult result) { |
| 288 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 324 | return std::shared_ptr<CachedShader>( |
| 325 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 326 | } | ||
| 327 | |||
| 328 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, | ||
| 329 | const ProgramCode& code) { | ||
| 330 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||
| 331 | |||
| 332 | const auto code_size{CalculateProgramSize(code)}; | ||
| 333 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||
| 334 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 335 | std::move(code), {})); | ||
| 336 | |||
| 337 | return std::shared_ptr<CachedShader>( | ||
| 338 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 339 | } | ||
| 340 | |||
| 341 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | ||
| 342 | GLShader::ProgramResult result) { | ||
| 343 | return std::shared_ptr<CachedShader>( | ||
| 344 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 289 | } | 345 | } |
| 290 | 346 | ||
| 291 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 347 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 292 | GLuint handle{}; | 348 | GLuint handle{}; |
| 293 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 349 | if (program_type == ProgramType::Geometry) { |
| 294 | handle = GetGeometryShader(variant); | 350 | handle = GetGeometryShader(variant); |
| 295 | } else { | 351 | } else { |
| 296 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 352 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| @@ -308,8 +364,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 308 | handle = program->handle; | 364 | handle = program->handle; |
| 309 | } | 365 | } |
| 310 | 366 | ||
| 311 | auto base_bindings{variant.base_bindings}; | 367 | auto base_bindings = variant.base_bindings; |
| 312 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 368 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); |
| 369 | if (program_type != ProgramType::Compute) { | ||
| 370 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||
| 371 | } | ||
| 313 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 372 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 314 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 373 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| 315 | 374 | ||
| @@ -589,13 +648,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 589 | // No shader found - create a new one | 648 | // No shader found - create a new one |
| 590 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 649 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 591 | ProgramCode program_code_b; | 650 | ProgramCode program_code_b; |
| 592 | if (program == Maxwell::ShaderProgram::VertexA) { | 651 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |
| 652 | if (is_program_a) { | ||
| 593 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 653 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 594 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 654 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 595 | memory_manager.GetPointer(program_addr_b)); | 655 | memory_manager.GetPointer(program_addr_b)); |
| 596 | } | 656 | } |
| 597 | 657 | ||
| 598 | const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 658 | const auto unique_identifier = |
| 659 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | ||
| 599 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 660 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |
| 600 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 661 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |
| 601 | host_ptr, unique_identifier}; | 662 | host_ptr, unique_identifier}; |
| @@ -612,4 +673,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 612 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 673 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 613 | } | 674 | } |
| 614 | 675 | ||
| 676 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 677 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 678 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 679 | auto kernel = TryGet(host_ptr); | ||
| 680 | if (kernel) { | ||
| 681 | return kernel; | ||
| 682 | } | ||
| 683 | |||
| 684 | // No kernel found - create a new one | ||
| 685 | const auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||
| 686 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||
| 687 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||
| 688 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 689 | host_ptr, unique_identifier}; | ||
| 690 | |||
| 691 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 692 | if (found == precompiled_shaders.end()) { | ||
| 693 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||
| 694 | } else { | ||
| 695 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||
| 696 | } | ||
| 697 | |||
| 698 | Register(kernel); | ||
| 699 | return kernel; | ||
| 700 | } | ||
| 701 | |||
| 615 | } // namespace OpenGL | 702 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index bbb53cdf4..7d84ffbab 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -55,12 +55,18 @@ class CachedShader final : public RasterizerCacheObject { | |||
| 55 | public: | 55 | public: |
| 56 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 56 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 57 | Maxwell::ShaderProgram program_type, | 57 | Maxwell::ShaderProgram program_type, |
| 58 | ProgramCode&& program_code, ProgramCode&& program_code_b); | 58 | const ProgramCode& program_code, |
| 59 | const ProgramCode& program_code_b); | ||
| 59 | 60 | ||
| 60 | static Shader CreateStageFromCache(const ShaderParameters& params, | 61 | static Shader CreateStageFromCache(const ShaderParameters& params, |
| 61 | Maxwell::ShaderProgram program_type, | 62 | Maxwell::ShaderProgram program_type, |
| 62 | GLShader::ProgramResult result); | 63 | GLShader::ProgramResult result); |
| 63 | 64 | ||
| 65 | static Shader CreateKernelFromMemory(const ShaderParameters& params, const ProgramCode& code); | ||
| 66 | |||
| 67 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 68 | GLShader::ProgramResult result); | ||
| 69 | |||
| 64 | VAddr GetCpuAddr() const override { | 70 | VAddr GetCpuAddr() const override { |
| 65 | return cpu_addr; | 71 | return cpu_addr; |
| 66 | } | 72 | } |
| @@ -78,7 +84,7 @@ public: | |||
| 78 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 84 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 79 | 85 | ||
| 80 | private: | 86 | private: |
| 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 87 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 82 | GLShader::ProgramResult result); | 88 | GLShader::ProgramResult result); |
| 83 | 89 | ||
| 84 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 90 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| @@ -104,7 +110,7 @@ private: | |||
| 104 | u8* host_ptr{}; | 110 | u8* host_ptr{}; |
| 105 | VAddr cpu_addr{}; | 111 | VAddr cpu_addr{}; |
| 106 | u64 unique_identifier{}; | 112 | u64 unique_identifier{}; |
| 107 | Maxwell::ShaderProgram program_type{}; | 113 | ProgramType program_type{}; |
| 108 | ShaderDiskCacheOpenGL& disk_cache; | 114 | ShaderDiskCacheOpenGL& disk_cache; |
| 109 | const PrecompiledPrograms& precompiled_programs; | 115 | const PrecompiledPrograms& precompiled_programs; |
| 110 | 116 | ||
| @@ -132,6 +138,9 @@ public: | |||
| 132 | /// Gets the current specified shader stage program | 138 | /// Gets the current specified shader stage program |
| 133 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 139 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 134 | 140 | ||
| 141 | /// Gets a compute kernel in the passed address | ||
| 142 | Shader GetComputeKernel(GPUVAddr code_addr); | ||
| 143 | |||
| 135 | protected: | 144 | protected: |
| 136 | // We do not have to flush this cache as things in it are never modified by us. | 145 | // We do not have to flush this cache as things in it are never modified by us. |
| 137 | void FlushObjectInner(const Shader& object) override {} | 146 | void FlushObjectInner(const Shader& object) override {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 119073776..6236c5cdd 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -37,7 +37,6 @@ using namespace std::string_literals; | |||
| 37 | using namespace VideoCommon::Shader; | 37 | using namespace VideoCommon::Shader; |
| 38 | 38 | ||
| 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 40 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 41 | using Operation = const OperationNode&; | 40 | using Operation = const OperationNode&; |
| 42 | 41 | ||
| 43 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 162 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 163 | } | 162 | } |
| 164 | 163 | ||
| 164 | constexpr bool IsVertexShader(ProgramType stage) { | ||
| 165 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||
| 166 | } | ||
| 167 | |||
| 165 | class GLSLDecompiler final { | 168 | class GLSLDecompiler final { |
| 166 | public: | 169 | public: |
| 167 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 170 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 168 | std::string suffix) | 171 | std::string suffix) |
| 169 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 172 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 170 | 173 | ||
| @@ -248,7 +251,7 @@ public: | |||
| 248 | } | 251 | } |
| 249 | entries.clip_distances = ir.GetClipDistances(); | 252 | entries.clip_distances = ir.GetClipDistances(); |
| 250 | entries.shader_viewport_layer_array = | 253 | entries.shader_viewport_layer_array = |
| 251 | stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); | 254 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); |
| 252 | entries.shader_length = ir.GetLength(); | 255 | entries.shader_length = ir.GetLength(); |
| 253 | return entries; | 256 | return entries; |
| 254 | } | 257 | } |
| @@ -259,14 +262,14 @@ private: | |||
| 259 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; | 262 | std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>; |
| 260 | 263 | ||
| 261 | void DeclareVertex() { | 264 | void DeclareVertex() { |
| 262 | if (stage != ShaderStage::Vertex) | 265 | if (!IsVertexShader(stage)) |
| 263 | return; | 266 | return; |
| 264 | 267 | ||
| 265 | DeclareVertexRedeclarations(); | 268 | DeclareVertexRedeclarations(); |
| 266 | } | 269 | } |
| 267 | 270 | ||
| 268 | void DeclareGeometry() { | 271 | void DeclareGeometry() { |
| 269 | if (stage != ShaderStage::Geometry) { | 272 | if (stage != ProgramType::Geometry) { |
| 270 | return; | 273 | return; |
| 271 | } | 274 | } |
| 272 | 275 | ||
| @@ -297,14 +300,14 @@ private: | |||
| 297 | break; | 300 | break; |
| 298 | } | 301 | } |
| 299 | } | 302 | } |
| 300 | if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { | 303 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { |
| 301 | if (ir.UsesLayer()) { | 304 | if (ir.UsesLayer()) { |
| 302 | code.AddLine("int gl_Layer;"); | 305 | code.AddLine("int gl_Layer;"); |
| 303 | } | 306 | } |
| 304 | if (ir.UsesViewportIndex()) { | 307 | if (ir.UsesViewportIndex()) { |
| 305 | code.AddLine("int gl_ViewportIndex;"); | 308 | code.AddLine("int gl_ViewportIndex;"); |
| 306 | } | 309 | } |
| 307 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && | 310 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && |
| 308 | !device.HasVertexViewportLayer()) { | 311 | !device.HasVertexViewportLayer()) { |
| 309 | LOG_ERROR( | 312 | LOG_ERROR( |
| 310 | Render_OpenGL, | 313 | Render_OpenGL, |
| @@ -341,6 +344,9 @@ private: | |||
| 341 | } | 344 | } |
| 342 | 345 | ||
| 343 | void DeclareLocalMemory() { | 346 | void DeclareLocalMemory() { |
| 347 | if (stage == ProgramType::Compute) { | ||
| 348 | return; | ||
| 349 | } | ||
| 344 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | 350 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { |
| 345 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 351 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 346 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | 352 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); |
| @@ -399,12 +405,12 @@ private: | |||
| 399 | const u32 location{GetGenericAttributeIndex(index)}; | 405 | const u32 location{GetGenericAttributeIndex(index)}; |
| 400 | 406 | ||
| 401 | std::string name{GetInputAttribute(index)}; | 407 | std::string name{GetInputAttribute(index)}; |
| 402 | if (stage == ShaderStage::Geometry) { | 408 | if (stage == ProgramType::Geometry) { |
| 403 | name = "gs_" + name + "[]"; | 409 | name = "gs_" + name + "[]"; |
| 404 | } | 410 | } |
| 405 | 411 | ||
| 406 | std::string suffix; | 412 | std::string suffix; |
| 407 | if (stage == ShaderStage::Fragment) { | 413 | if (stage == ProgramType::Fragment) { |
| 408 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 414 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 409 | if (skip_unused && input_mode == AttributeUse::Unused) { | 415 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 410 | return; | 416 | return; |
| @@ -416,7 +422,7 @@ private: | |||
| 416 | } | 422 | } |
| 417 | 423 | ||
| 418 | void DeclareOutputAttributes() { | 424 | void DeclareOutputAttributes() { |
| 419 | if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | 425 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { |
| 420 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 426 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 421 | DeclareOutputAttribute(ToGenericAttribute(i)); | 427 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 422 | } | 428 | } |
| @@ -538,7 +544,7 @@ private: | |||
| 538 | constexpr u32 element_stride{4}; | 544 | constexpr u32 element_stride{4}; |
| 539 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 545 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 540 | 546 | ||
| 541 | const bool declared{stage != ShaderStage::Fragment || | 547 | const bool declared{stage != ProgramType::Fragment || |
| 542 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | 548 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; |
| 543 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | 549 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; |
| 544 | code.AddLine("case 0x{:x}: return {};", address, value); | 550 | code.AddLine("case 0x{:x}: return {};", address, value); |
| @@ -642,7 +648,7 @@ private: | |||
| 642 | } | 648 | } |
| 643 | 649 | ||
| 644 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 650 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 645 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | 651 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, |
| 646 | "Physical attributes in geometry shaders are not implemented"); | 652 | "Physical attributes in geometry shaders are not implemented"); |
| 647 | if (abuf->IsPhysicalBuffer()) { | 653 | if (abuf->IsPhysicalBuffer()) { |
| 648 | return fmt::format("readPhysicalAttribute(ftou({}))", | 654 | return fmt::format("readPhysicalAttribute(ftou({}))", |
| @@ -697,6 +703,7 @@ private: | |||
| 697 | } | 703 | } |
| 698 | 704 | ||
| 699 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 705 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 706 | UNIMPLEMENTED_IF(stage == ProgramType::Compute); | ||
| 700 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 707 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 701 | } | 708 | } |
| 702 | 709 | ||
| @@ -726,7 +733,7 @@ private: | |||
| 726 | 733 | ||
| 727 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 734 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 728 | const auto GeometryPass = [&](std::string_view name) { | 735 | const auto GeometryPass = [&](std::string_view name) { |
| 729 | if (stage == ShaderStage::Geometry && buffer) { | 736 | if (stage == ProgramType::Geometry && buffer) { |
| 730 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 737 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 731 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 738 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 732 | // this happens and what's its intent. | 739 | // this happens and what's its intent. |
| @@ -738,10 +745,10 @@ private: | |||
| 738 | switch (attribute) { | 745 | switch (attribute) { |
| 739 | case Attribute::Index::Position: | 746 | case Attribute::Index::Position: |
| 740 | switch (stage) { | 747 | switch (stage) { |
| 741 | case ShaderStage::Geometry: | 748 | case ProgramType::Geometry: |
| 742 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | 749 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), |
| 743 | GetSwizzle(element)); | 750 | GetSwizzle(element)); |
| 744 | case ShaderStage::Fragment: | 751 | case ProgramType::Fragment: |
| 745 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | 752 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); |
| 746 | default: | 753 | default: |
| 747 | UNREACHABLE(); | 754 | UNREACHABLE(); |
| @@ -762,7 +769,7 @@ private: | |||
| 762 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 769 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 763 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 770 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 764 | // shader. | 771 | // shader. |
| 765 | ASSERT(stage == ShaderStage::Vertex); | 772 | ASSERT(IsVertexShader(stage)); |
| 766 | switch (element) { | 773 | switch (element) { |
| 767 | case 2: | 774 | case 2: |
| 768 | // Config pack's first value is instance_id. | 775 | // Config pack's first value is instance_id. |
| @@ -774,7 +781,7 @@ private: | |||
| 774 | return "0"; | 781 | return "0"; |
| 775 | case Attribute::Index::FrontFacing: | 782 | case Attribute::Index::FrontFacing: |
| 776 | // TODO(Subv): Find out what the values are for the other elements. | 783 | // TODO(Subv): Find out what the values are for the other elements. |
| 777 | ASSERT(stage == ShaderStage::Fragment); | 784 | ASSERT(stage == ProgramType::Fragment); |
| 778 | switch (element) { | 785 | switch (element) { |
| 779 | case 3: | 786 | case 3: |
| 780 | return "itof(gl_FrontFacing ? -1 : 0)"; | 787 | return "itof(gl_FrontFacing ? -1 : 0)"; |
| @@ -796,7 +803,7 @@ private: | |||
| 796 | return value; | 803 | return value; |
| 797 | } | 804 | } |
| 798 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders | 805 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 799 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | 806 | const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; |
| 800 | 807 | ||
| 801 | const std::string temporary = code.GenerateTemporary(); | 808 | const std::string temporary = code.GenerateTemporary(); |
| 802 | code.AddLine("{}float {} = {};", precise, temporary, value); | 809 | code.AddLine("{}float {} = {};", precise, temporary, value); |
| @@ -831,12 +838,12 @@ private: | |||
| 831 | UNIMPLEMENTED(); | 838 | UNIMPLEMENTED(); |
| 832 | return {}; | 839 | return {}; |
| 833 | case 1: | 840 | case 1: |
| 834 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | 841 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 835 | return {}; | 842 | return {}; |
| 836 | } | 843 | } |
| 837 | return std::make_pair("gl_Layer", true); | 844 | return std::make_pair("gl_Layer", true); |
| 838 | case 2: | 845 | case 2: |
| 839 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | 846 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 840 | return {}; | 847 | return {}; |
| 841 | } | 848 | } |
| 842 | return std::make_pair("gl_ViewportIndex", true); | 849 | return std::make_pair("gl_ViewportIndex", true); |
| @@ -1073,6 +1080,7 @@ private: | |||
| 1073 | target = result->first; | 1080 | target = result->first; |
| 1074 | is_integer = result->second; | 1081 | is_integer = result->second; |
| 1075 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1082 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1083 | UNIMPLEMENTED_IF(stage == ProgramType::Compute); | ||
| 1076 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1084 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1077 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1085 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1078 | const std::string real = Visit(gmem->GetRealAddress()); | 1086 | const std::string real = Visit(gmem->GetRealAddress()); |
| @@ -1630,7 +1638,7 @@ private: | |||
| 1630 | } | 1638 | } |
| 1631 | 1639 | ||
| 1632 | std::string Exit(Operation operation) { | 1640 | std::string Exit(Operation operation) { |
| 1633 | if (stage != ShaderStage::Fragment) { | 1641 | if (stage != ProgramType::Fragment) { |
| 1634 | code.AddLine("return;"); | 1642 | code.AddLine("return;"); |
| 1635 | return {}; | 1643 | return {}; |
| 1636 | } | 1644 | } |
| @@ -1681,7 +1689,7 @@ private: | |||
| 1681 | } | 1689 | } |
| 1682 | 1690 | ||
| 1683 | std::string EmitVertex(Operation operation) { | 1691 | std::string EmitVertex(Operation operation) { |
| 1684 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1692 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1685 | "EmitVertex is expected to be used in a geometry shader."); | 1693 | "EmitVertex is expected to be used in a geometry shader."); |
| 1686 | 1694 | ||
| 1687 | // If a geometry shader is attached, it will always flip (it's the last stage before | 1695 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| @@ -1692,7 +1700,7 @@ private: | |||
| 1692 | } | 1700 | } |
| 1693 | 1701 | ||
| 1694 | std::string EndPrimitive(Operation operation) { | 1702 | std::string EndPrimitive(Operation operation) { |
| 1695 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1703 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1696 | "EndPrimitive is expected to be used in a geometry shader."); | 1704 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1697 | 1705 | ||
| 1698 | code.AddLine("EndPrimitive();"); | 1706 | code.AddLine("EndPrimitive();"); |
| @@ -1927,7 +1935,7 @@ private: | |||
| 1927 | } | 1935 | } |
| 1928 | 1936 | ||
| 1929 | u32 GetNumPhysicalInputAttributes() const { | 1937 | u32 GetNumPhysicalInputAttributes() const { |
| 1930 | return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 1938 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 1931 | } | 1939 | } |
| 1932 | 1940 | ||
| 1933 | u32 GetNumPhysicalAttributes() const { | 1941 | u32 GetNumPhysicalAttributes() const { |
| @@ -1940,7 +1948,7 @@ private: | |||
| 1940 | 1948 | ||
| 1941 | const Device& device; | 1949 | const Device& device; |
| 1942 | const ShaderIR& ir; | 1950 | const ShaderIR& ir; |
| 1943 | const ShaderStage stage; | 1951 | const ProgramType stage; |
| 1944 | const std::string suffix; | 1952 | const std::string suffix; |
| 1945 | const Header header; | 1953 | const Header header; |
| 1946 | 1954 | ||
| @@ -1971,7 +1979,7 @@ std::string GetCommonDeclarations() { | |||
| 1971 | MAX_CONSTBUFFER_ELEMENTS); | 1979 | MAX_CONSTBUFFER_ELEMENTS); |
| 1972 | } | 1980 | } |
| 1973 | 1981 | ||
| 1974 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | 1982 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 1975 | const std::string& suffix) { | 1983 | const std::string& suffix) { |
| 1976 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 1984 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 1977 | decompiler.Decompile(); | 1985 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02586736d..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,14 +12,26 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 20 | class ShaderIR; | 16 | class ShaderIR; |
| 21 | } | 17 | } |
| 22 | 18 | ||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | |||
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | |||
| 23 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 24 | 36 | ||
| 25 | struct ShaderEntries; | 37 | struct ShaderEntries; |
| @@ -85,6 +97,6 @@ struct ShaderEntries { | |||
| 85 | std::string GetCommonDeclarations(); | 97 | std::string GetCommonDeclarations(); |
| 86 | 98 | ||
| 87 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 99 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 88 | Maxwell::ShaderStage stage, const std::string& suffix); | 100 | ProgramType stage, const std::string& suffix); |
| 89 | 101 | ||
| 90 | } // namespace OpenGL::GLShader | 102 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 7893d1e26..969fe9ced 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 51 | 51 | ||
| 52 | } // namespace | 52 | } // namespace |
| 53 | 53 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | 55 | u32 program_code_size, u32 program_code_size_b, |
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 56 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 57 | : unique_identifier{unique_identifier}, program_type{program_type}, |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4f296dda6..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | 22 | ||
| 24 | namespace Core { | 23 | namespace Core { |
| @@ -34,14 +33,11 @@ namespace OpenGL { | |||
| 34 | struct ShaderDiskCacheUsage; | 33 | struct ShaderDiskCacheUsage; |
| 35 | struct ShaderDiskCacheDump; | 34 | struct ShaderDiskCacheDump; |
| 36 | 35 | ||
| 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 38 | |||
| 39 | using ProgramCode = std::vector<u64>; | 36 | using ProgramCode = std::vector<u64>; |
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 41 | |||
| 42 | using TextureBufferUsage = std::bitset<64>; | 38 | using TextureBufferUsage = std::bitset<64>; |
| 43 | 39 | ||
| 44 | /// Allocated bindings used by an OpenGL shader program. | 40 | /// Allocated bindings used by an OpenGL shader program |
| 45 | struct BaseBindings { | 41 | struct BaseBindings { |
| 46 | u32 cbuf{}; | 42 | u32 cbuf{}; |
| 47 | u32 gmem{}; | 43 | u32 gmem{}; |
| @@ -126,7 +122,7 @@ namespace OpenGL { | |||
| 126 | /// Describes a shader how it's used by the guest GPU | 122 | /// Describes a shader how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 128 | public: | 124 | public: |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 130 | u32 program_code_size, u32 program_code_size_b, | 126 | u32 program_code_size, u32 program_code_size_b, |
| 131 | ProgramCode program_code, ProgramCode program_code_b); | 127 | ProgramCode program_code, ProgramCode program_code_b); |
| 132 | ShaderDiskCacheRaw(); | 128 | ShaderDiskCacheRaw(); |
| @@ -141,30 +137,13 @@ public: | |||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | bool HasProgramA() const { | 139 | bool HasProgramA() const { |
| 144 | return program_type == Maxwell::ShaderProgram::VertexA; | 140 | return program_type == ProgramType::VertexA; |
| 145 | } | 141 | } |
| 146 | 142 | ||
| 147 | Maxwell::ShaderProgram GetProgramType() const { | 143 | ProgramType GetProgramType() const { |
| 148 | return program_type; | 144 | return program_type; |
| 149 | } | 145 | } |
| 150 | 146 | ||
| 151 | Maxwell::ShaderStage GetProgramStage() const { | ||
| 152 | switch (program_type) { | ||
| 153 | case Maxwell::ShaderProgram::VertexA: | ||
| 154 | case Maxwell::ShaderProgram::VertexB: | ||
| 155 | return Maxwell::ShaderStage::Vertex; | ||
| 156 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 157 | return Maxwell::ShaderStage::TesselationControl; | ||
| 158 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 159 | return Maxwell::ShaderStage::TesselationEval; | ||
| 160 | case Maxwell::ShaderProgram::Geometry: | ||
| 161 | return Maxwell::ShaderStage::Geometry; | ||
| 162 | case Maxwell::ShaderProgram::Fragment: | ||
| 163 | return Maxwell::ShaderStage::Fragment; | ||
| 164 | } | ||
| 165 | UNREACHABLE(); | ||
| 166 | } | ||
| 167 | |||
| 168 | const ProgramCode& GetProgramCode() const { | 147 | const ProgramCode& GetProgramCode() const { |
| 169 | return program_code; | 148 | return program_code; |
| 170 | } | 149 | } |
| @@ -175,7 +154,7 @@ public: | |||
| 175 | 154 | ||
| 176 | private: | 155 | private: |
| 177 | u64 unique_identifier{}; | 156 | u64 unique_identifier{}; |
| 178 | Maxwell::ShaderProgram program_type{}; | 157 | ProgramType program_type{}; |
| 179 | u32 program_code_size{}; | 158 | u32 program_code_size{}; |
| 180 | u32 program_code_size_b{}; | 159 | u32 program_code_size_b{}; |
| 181 | 160 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f9ee8429e..de72570d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| 14 | using VideoCommon::Shader::ProgramCode; | 14 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 15 | using VideoCommon::Shader::ShaderIR; |
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET = 10; |
| 18 | static constexpr u32 COMPUTE_OFFSET = 0; | ||
| 18 | 19 | ||
| 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | )"; | 32 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 33 | ProgramResult program = | ||
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 35 | 33 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 36 | out += program.first; | 37 | out += program.first; |
| 37 | 38 | ||
| 38 | if (setup.IsDualProgram()) { | 39 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); | 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||
| 42 | |||
| 43 | out += program_b.first; | 42 | out += program_b.first; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| @@ -80,9 +79,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | )"; | 81 | )"; |
| 82 | |||
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 84 | ProgramResult program = | 84 | ProgramResult program = |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | 85 | Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 86 | out += program.first; | 86 | out += program.first; |
| 87 | 87 | ||
| 88 | out += R"( | 88 | out += R"( |
| @@ -116,9 +116,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 116 | 116 | ||
| 117 | )"; | 117 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 119 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 121 | |||
| 122 | out += program.first; | 120 | out += program.first; |
| 123 | 121 | ||
| 124 | out += R"( | 122 | out += R"( |
| @@ -130,4 +128,22 @@ void main() { | |||
| 130 | return {std::move(out), std::move(program.second)}; | 128 | return {std::move(out), std::move(program.second)}; |
| 131 | } | 129 | } |
| 132 | 130 | ||
| 131 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||
| 132 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 133 | |||
| 134 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 135 | out += GetCommonDeclarations(); | ||
| 136 | |||
| 137 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||
| 138 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 139 | out += program.first; | ||
| 140 | |||
| 141 | out += R"( | ||
| 142 | void main() { | ||
| 143 | execute_compute(); | ||
| 144 | } | ||
| 145 | )"; | ||
| 146 | return {std::move(out), std::move(program.second)}; | ||
| 147 | } | ||
| 148 | |||
| 133 | } // namespace OpenGL::GLShader | 149 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 7cbc590f8..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| 54 | /// Generates the GLSL fragment shader program source code for the given FS program | 54 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 56 | 56 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||
| 59 | |||
| 57 | } // namespace OpenGL::GLShader | 60 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..bab7d1a49 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,21 +10,25 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadShader(const char* source, GLenum type) { | 13 | namespace { |
| 14 | const char* debug_type; | 14 | constexpr const char* GetStageDebugName(GLenum type) { |
| 15 | switch (type) { | 15 | switch (type) { |
| 16 | case GL_VERTEX_SHADER: | 16 | case GL_VERTEX_SHADER: |
| 17 | debug_type = "vertex"; | 17 | return "vertex"; |
| 18 | break; | ||
| 19 | case GL_GEOMETRY_SHADER: | 18 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | 19 | return "geometry"; |
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | 20 | case GL_FRAGMENT_SHADER: |
| 23 | debug_type = "fragment"; | 21 | return "fragment"; |
| 24 | break; | 22 | case GL_COMPUTE_SHADER: |
| 23 | return "compute"; | ||
| 25 | default: | 24 | default: |
| 26 | UNREACHABLE(); | 25 | UNREACHABLE(); |
| 27 | } | 26 | } |
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | GLuint LoadShader(const char* source, GLenum type) { | ||
| 31 | const char* debug_type = GetStageDebugName(type); | ||
| 28 | const GLuint shader_id = glCreateShader(type); | 32 | const GLuint shader_id = glCreateShader(type); |
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | 33 | glShaderSource(shader_id, 1, &source, nullptr); |
| 30 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 34 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); |