diff options
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 99 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 150 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 74 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 33 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 39 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.cpp | 24 |
15 files changed, 357 insertions, 140 deletions
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e3d5fb8a9..08586d33c 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void KeplerCompute::ProcessLaunch() { | 52 | void KeplerCompute::ProcessLaunch() { |
| 53 | |||
| 54 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 53 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 55 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 54 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
| 56 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | 55 | LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); |
| 57 | 56 | ||
| 58 | const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | 57 | const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; |
| 59 | LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | 58 | LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); |
| 59 | |||
| 60 | rasterizer.DispatchCompute(code_addr); | ||
| 60 | } | 61 | } |
| 61 | 62 | ||
| 62 | } // namespace Tegra::Engines | 63 | } // namespace Tegra::Engines |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 1b4975498..e25754e37 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
| 50 | return *maxwell_3d; | 50 | return *maxwell_3d; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | Engines::KeplerCompute& GPU::KeplerCompute() { | ||
| 54 | return *kepler_compute; | ||
| 55 | } | ||
| 56 | |||
| 57 | const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||
| 58 | return *kepler_compute; | ||
| 59 | } | ||
| 60 | |||
| 53 | MemoryManager& GPU::MemoryManager() { | 61 | MemoryManager& GPU::MemoryManager() { |
| 54 | return *memory_manager; | 62 | return *memory_manager; |
| 55 | } | 63 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..0ace0ff4f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -155,6 +155,12 @@ public: | |||
| 155 | /// Returns a const reference to the Maxwell3D GPU engine. | 155 | /// Returns a const reference to the Maxwell3D GPU engine. |
| 156 | const Engines::Maxwell3D& Maxwell3D() const; | 156 | const Engines::Maxwell3D& Maxwell3D() const; |
| 157 | 157 | ||
| 158 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 159 | Engines::KeplerCompute& KeplerCompute(); | ||
| 160 | |||
| 161 | /// Returns a reference to the KeplerCompute GPU engine. | ||
| 162 | const Engines::KeplerCompute& KeplerCompute() const; | ||
| 163 | |||
| 158 | /// Returns a reference to the GPU memory manager. | 164 | /// Returns a reference to the GPU memory manager. |
| 159 | Tegra::MemoryManager& MemoryManager(); | 165 | Tegra::MemoryManager& MemoryManager(); |
| 160 | 166 | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2b7367568..9881df0d5 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -34,6 +34,9 @@ public: | |||
| 34 | /// Clear the current framebuffer | 34 | /// Clear the current framebuffer |
| 35 | virtual void Clear() = 0; | 35 | virtual void Clear() = 0; |
| 36 | 36 | ||
| 37 | /// Dispatches a compute shader invocation | ||
| 38 | virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||
| 39 | |||
| 37 | /// Notify rasterizer that all caches should be flushed to Switch memory | 40 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 38 | virtual void FlushAll() = 0; | 41 | virtual void FlushAll() = 0; |
| 39 | 42 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 0432a9e10..c59e687b6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <bitset> | ||
| 7 | #include <memory> | 8 | #include <memory> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <string_view> | 10 | #include <string_view> |
| @@ -19,6 +20,7 @@ | |||
| 19 | #include "core/core.h" | 20 | #include "core/core.h" |
| 20 | #include "core/hle/kernel/process.h" | 21 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | ||
| 22 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -326,9 +328,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 326 | 328 | ||
| 327 | Shader shader{shader_cache.GetStageProgram(program)}; | 329 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 328 | 330 | ||
| 329 | const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; | 331 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| 330 | SetupDrawConstBuffers(stage_enum, shader); | 332 | SetupDrawConstBuffers(stage_enum, shader); |
| 331 | SetupGlobalRegions(stage_enum, shader); | 333 | SetupDrawGlobalMemory(stage_enum, shader); |
| 332 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | 334 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; |
| 333 | 335 | ||
| 334 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | 336 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; |
| @@ -783,6 +785,45 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 783 | gpu.dirty.memory_general = false; | 785 | gpu.dirty.memory_general = false; |
| 784 | } | 786 | } |
| 785 | 787 | ||
| 788 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||
| 789 | if (!GLAD_GL_ARB_compute_variable_group_size) { | ||
| 790 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 791 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 792 | return; | ||
| 793 | } | ||
| 794 | |||
| 795 | auto kernel = shader_cache.GetComputeKernel(code_addr); | ||
| 796 | const auto [program, next_bindings] = kernel->GetProgramHandle({}); | ||
| 797 | state.draw.shader_program = program; | ||
| 798 | state.draw.program_pipeline = 0; | ||
| 799 | |||
| 800 | const std::size_t buffer_size = | ||
| 801 | Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 802 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 803 | buffer_cache.Map(buffer_size); | ||
| 804 | |||
| 805 | bind_ubo_pushbuffer.Setup(0); | ||
| 806 | bind_ssbo_pushbuffer.Setup(0); | ||
| 807 | |||
| 808 | SetupComputeConstBuffers(kernel); | ||
| 809 | SetupComputeGlobalMemory(kernel); | ||
| 810 | |||
| 811 | // TODO(Rodrigo): Bind images and samplers | ||
| 812 | |||
| 813 | buffer_cache.Unmap(); | ||
| 814 | |||
| 815 | bind_ubo_pushbuffer.Bind(); | ||
| 816 | bind_ssbo_pushbuffer.Bind(); | ||
| 817 | |||
| 818 | state.ApplyShaderProgram(); | ||
| 819 | state.ApplyProgramPipeline(); | ||
| 820 | |||
| 821 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 822 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 823 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 824 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 825 | } | ||
| 826 | |||
| 786 | void RasterizerOpenGL::FlushAll() {} | 827 | void RasterizerOpenGL::FlushAll() {} |
| 787 | 828 | ||
| 788 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | 829 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { |
| @@ -856,12 +897,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 856 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 897 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 857 | const Shader& shader) { | 898 | const Shader& shader) { |
| 858 | MICROPROFILE_SCOPE(OpenGL_UBO); | 899 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 859 | const auto stage_index = static_cast<std::size_t>(stage); | 900 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 860 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 901 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; |
| 861 | |||
| 862 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | ||
| 863 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 902 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 864 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 903 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 904 | SetupConstBuffer(buffer, entry); | ||
| 905 | } | ||
| 906 | } | ||
| 907 | |||
| 908 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | ||
| 909 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 910 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 911 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | ||
| 912 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 913 | const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); | ||
| 914 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 915 | buffer.address = config.Address(); | ||
| 916 | buffer.size = config.size; | ||
| 917 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 918 | SetupConstBuffer(buffer, entry); | ||
| 865 | } | 919 | } |
| 866 | } | 920 | } |
| 867 | 921 | ||
| @@ -882,24 +936,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 882 | bind_ubo_pushbuffer.Push(cbuf, offset, size); | 936 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 883 | } | 937 | } |
| 884 | 938 | ||
| 885 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 939 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 886 | const Shader& shader) { | 940 | const Shader& shader) { |
| 887 | auto& gpu{system.GPU()}; | 941 | auto& gpu{system.GPU()}; |
| 888 | auto& memory_manager{gpu.MemoryManager()}; | 942 | auto& memory_manager{gpu.MemoryManager()}; |
| 889 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | 943 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 890 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 891 | |||
| 892 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 944 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 893 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 945 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 894 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | 946 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 895 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 947 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 948 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 949 | } | ||
| 950 | } | ||
| 896 | 951 | ||
| 897 | const auto [ssbo, buffer_offset] = | 952 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { |
| 898 | buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); | 953 | auto& gpu{system.GPU()}; |
| 899 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | 954 | auto& memory_manager{gpu.MemoryManager()}; |
| 955 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||
| 956 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||
| 957 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 958 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||
| 959 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 960 | SetupGlobalMemory(entry, gpu_addr, size); | ||
| 900 | } | 961 | } |
| 901 | } | 962 | } |
| 902 | 963 | ||
| 964 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | ||
| 965 | GPUVAddr gpu_addr, std::size_t size) { | ||
| 966 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 967 | const auto [ssbo, buffer_offset] = | ||
| 968 | buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); | ||
| 969 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 970 | } | ||
| 971 | |||
| 903 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 972 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 904 | BaseBindings base_bindings) { | 973 | BaseBindings base_bindings) { |
| 905 | MICROPROFILE_SCOPE(OpenGL_Texture); | 974 | MICROPROFILE_SCOPE(OpenGL_Texture); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ef34d3f54..8b123c48d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | 58 | ||
| 59 | void DrawArrays() override; | 59 | void DrawArrays() override; |
| 60 | void Clear() override; | 60 | void Clear() override; |
| 61 | void DispatchCompute(GPUVAddr code_addr) override; | ||
| 61 | void FlushAll() override; | 62 | void FlushAll() override; |
| 62 | void FlushRegion(CacheAddr addr, u64 size) override; | 63 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 63 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| @@ -115,13 +116,23 @@ private: | |||
| 115 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 116 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 116 | const Shader& shader); | 117 | const Shader& shader); |
| 117 | 118 | ||
| 119 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 120 | void SetupComputeConstBuffers(const Shader& kernel); | ||
| 121 | |||
| 118 | /// Configures a constant buffer. | 122 | /// Configures a constant buffer. |
| 119 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 123 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, |
| 120 | const GLShader::ConstBufferEntry& entry); | 124 | const GLShader::ConstBufferEntry& entry); |
| 121 | 125 | ||
| 122 | /// Configures the current global memory entries to use for the draw command. | 126 | /// Configures the current global memory entries to use for the draw command. |
| 123 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 127 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 124 | const Shader& shader); | 128 | const Shader& shader); |
| 129 | |||
| 130 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 131 | void SetupComputeGlobalMemory(const Shader& kernel); | ||
| 132 | |||
| 133 | /// Configures a constant buffer. | ||
| 134 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 135 | std::size_t size); | ||
| 125 | 136 | ||
| 126 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 137 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 127 | /// usage. | 138 | /// usage. |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 456ba0403..1c90facc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -23,13 +23,13 @@ namespace OpenGL { | |||
| 23 | 23 | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 24 | using VideoCommon::Shader::ProgramCode; |
| 25 | 25 | ||
| 26 | // One UBO is always reserved for emulation values | 26 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 RESERVED_UBOS = 1; | 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 28 | ||
| 29 | struct UnspecializedShader { | 29 | struct UnspecializedShader { |
| 30 | std::string code; | 30 | std::string code; |
| 31 | GLShader::ShaderEntries entries; | 31 | GLShader::ShaderEntries entries; |
| 32 | Maxwell::ShaderProgram program_type; | 32 | ProgramType program_type; |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | namespace { | 35 | namespace { |
| @@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /// Gets the shader type from a Maxwell program type | 57 | /// Gets the shader type from a Maxwell program type |
| 58 | constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | 58 | constexpr GLenum GetShaderType(ProgramType program_type) { |
| 59 | switch (program_type) { | 59 | switch (program_type) { |
| 60 | case Maxwell::ShaderProgram::VertexA: | 60 | case ProgramType::VertexA: |
| 61 | case Maxwell::ShaderProgram::VertexB: | 61 | case ProgramType::VertexB: |
| 62 | return GL_VERTEX_SHADER; | 62 | return GL_VERTEX_SHADER; |
| 63 | case Maxwell::ShaderProgram::Geometry: | 63 | case ProgramType::Geometry: |
| 64 | return GL_GEOMETRY_SHADER; | 64 | return GL_GEOMETRY_SHADER; |
| 65 | case Maxwell::ShaderProgram::Fragment: | 65 | case ProgramType::Fragment: |
| 66 | return GL_FRAGMENT_SHADER; | 66 | return GL_FRAGMENT_SHADER; |
| 67 | case ProgramType::Compute: | ||
| 68 | return GL_COMPUTE_SHADER; | ||
| 67 | default: | 69 | default: |
| 68 | return GL_NONE; | 70 | return GL_NONE; |
| 69 | } | 71 | } |
| @@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 100 | } | 102 | } |
| 101 | } | 103 | } |
| 102 | 104 | ||
| 105 | ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 106 | switch (program) { | ||
| 107 | case Maxwell::ShaderProgram::VertexA: | ||
| 108 | return ProgramType::VertexA; | ||
| 109 | case Maxwell::ShaderProgram::VertexB: | ||
| 110 | return ProgramType::VertexB; | ||
| 111 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 112 | return ProgramType::TessellationControl; | ||
| 113 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 114 | return ProgramType::TessellationEval; | ||
| 115 | case Maxwell::ShaderProgram::Geometry: | ||
| 116 | return ProgramType::Geometry; | ||
| 117 | case Maxwell::ShaderProgram::Fragment: | ||
| 118 | return ProgramType::Fragment; | ||
| 119 | } | ||
| 120 | UNREACHABLE(); | ||
| 121 | return {}; | ||
| 122 | } | ||
| 123 | |||
| 103 | /// Calculates the size of a program stream | 124 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 125 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 105 | constexpr std::size_t start_offset = 10; | 126 | constexpr std::size_t start_offset = 10; |
| @@ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 128 | } | 149 | } |
| 129 | 150 | ||
| 130 | /// Hashes one (or two) program streams | 151 | /// Hashes one (or two) program streams |
| 131 | u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | 152 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 132 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | 153 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |
| 133 | if (size_a == 0) { | 154 | if (size_a == 0) { |
| 134 | size_a = CalculateProgramSize(code); | 155 | size_a = CalculateProgramSize(code); |
| 135 | } | 156 | } |
| 136 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | 157 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); |
| 137 | if (program_type != Maxwell::ShaderProgram::VertexA) { | 158 | if (program_type != ProgramType::VertexA) { |
| 138 | return unique_identifier; | 159 | return unique_identifier; |
| 139 | } | 160 | } |
| 140 | // VertexA programs include two programs | 161 | // VertexA programs include two programs |
| @@ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| 152 | } | 173 | } |
| 153 | 174 | ||
| 154 | /// Creates an unspecialized program from code streams | 175 | /// Creates an unspecialized program from code streams |
| 155 | GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | 176 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, |
| 156 | ProgramCode program_code, ProgramCode program_code_b) { | 177 | ProgramCode program_code, ProgramCode program_code_b) { |
| 157 | GLShader::ShaderSetup setup(program_code); | 178 | GLShader::ShaderSetup setup(program_code); |
| 158 | setup.program.size_a = CalculateProgramSize(program_code); | 179 | setup.program.size_a = CalculateProgramSize(program_code); |
| 159 | setup.program.size_b = 0; | 180 | setup.program.size_b = 0; |
| 160 | if (program_type == Maxwell::ShaderProgram::VertexA) { | 181 | if (program_type == ProgramType::VertexA) { |
| 161 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | 182 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |
| 162 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | 183 | // Conventional HW does not support this, so we combine VertexA and VertexB into one |
| 163 | // stage here. | 184 | // stage here. |
| @@ -168,22 +189,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr | |||
| 168 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | 189 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |
| 169 | 190 | ||
| 170 | switch (program_type) { | 191 | switch (program_type) { |
| 171 | case Maxwell::ShaderProgram::VertexA: | 192 | case ProgramType::VertexA: |
| 172 | case Maxwell::ShaderProgram::VertexB: | 193 | case ProgramType::VertexB: |
| 173 | return GLShader::GenerateVertexShader(device, setup); | 194 | return GLShader::GenerateVertexShader(device, setup); |
| 174 | case Maxwell::ShaderProgram::Geometry: | 195 | case ProgramType::Geometry: |
| 175 | return GLShader::GenerateGeometryShader(device, setup); | 196 | return GLShader::GenerateGeometryShader(device, setup); |
| 176 | case Maxwell::ShaderProgram::Fragment: | 197 | case ProgramType::Fragment: |
| 177 | return GLShader::GenerateFragmentShader(device, setup); | 198 | return GLShader::GenerateFragmentShader(device, setup); |
| 199 | case ProgramType::Compute: | ||
| 200 | return GLShader::GenerateComputeShader(device, setup); | ||
| 178 | default: | 201 | default: |
| 179 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | 202 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 180 | UNREACHABLE(); | ||
| 181 | return {}; | 203 | return {}; |
| 182 | } | 204 | } |
| 183 | } | 205 | } |
| 184 | 206 | ||
| 185 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 207 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 186 | Maxwell::ShaderProgram program_type, const ProgramVariant& variant, | 208 | ProgramType program_type, const ProgramVariant& variant, |
| 187 | bool hint_retrievable = false) { | 209 | bool hint_retrievable = false) { |
| 188 | auto base_bindings{variant.base_bindings}; | 210 | auto base_bindings{variant.base_bindings}; |
| 189 | const auto primitive_mode{variant.primitive_mode}; | 211 | const auto primitive_mode{variant.primitive_mode}; |
| @@ -194,7 +216,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 194 | if (entries.shader_viewport_layer_array) { | 216 | if (entries.shader_viewport_layer_array) { |
| 195 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | 217 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; |
| 196 | } | 218 | } |
| 197 | source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 219 | if (program_type == ProgramType::Compute) { |
| 220 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 221 | } | ||
| 222 | source += '\n'; | ||
| 223 | |||
| 224 | if (program_type != ProgramType::Compute) { | ||
| 225 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 226 | } | ||
| 198 | 227 | ||
| 199 | for (const auto& cbuf : entries.const_buffers) { | 228 | for (const auto& cbuf : entries.const_buffers) { |
| 200 | source += | 229 | source += |
| @@ -221,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 221 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | 250 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); |
| 222 | } | 251 | } |
| 223 | 252 | ||
| 224 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 253 | if (program_type == ProgramType::Geometry) { |
| 225 | const auto [glsl_topology, debug_name, max_vertices] = | 254 | const auto [glsl_topology, debug_name, max_vertices] = |
| 226 | GetPrimitiveDescription(primitive_mode); | 255 | GetPrimitiveDescription(primitive_mode); |
| 227 | 256 | ||
| 228 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; | 257 | source += "layout (" + std::string(glsl_topology) + ") in;\n"; |
| 229 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | 258 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; |
| 230 | } | 259 | } |
| 260 | if (program_type == ProgramType::Compute) { | ||
| 261 | source += "layout (local_size_variable) in;\n"; | ||
| 262 | } | ||
| 231 | 263 | ||
| 232 | source += code; | 264 | source += code; |
| 233 | 265 | ||
| @@ -255,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 255 | 287 | ||
| 256 | } // Anonymous namespace | 288 | } // Anonymous namespace |
| 257 | 289 | ||
| 258 | CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 290 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 259 | GLShader::ProgramResult result) | 291 | GLShader::ProgramResult result) |
| 260 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | 292 | : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, |
| 261 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 293 | unique_identifier{params.unique_identifier}, program_type{program_type}, |
| @@ -268,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 268 | ProgramCode&& program_code_b) { | 300 | ProgramCode&& program_code_b) { |
| 269 | const auto code_size{CalculateProgramSize(program_code)}; | 301 | const auto code_size{CalculateProgramSize(program_code)}; |
| 270 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | 302 | const auto code_size_b{CalculateProgramSize(program_code_b)}; |
| 271 | auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; | 303 | auto result{ |
| 304 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 272 | if (result.first.empty()) { | 305 | if (result.first.empty()) { |
| 273 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | 306 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |
| 274 | return {}; | 307 | return {}; |
| 275 | } | 308 | } |
| 276 | 309 | ||
| 277 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 310 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 278 | params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), | 311 | params.unique_identifier, GetProgramType(program_type), |
| 279 | static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), | 312 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |
| 280 | std::move(program_code_b))); | 313 | std::move(program_code), std::move(program_code_b))); |
| 281 | 314 | ||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 315 | return std::shared_ptr<CachedShader>( |
| 316 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 283 | } | 317 | } |
| 284 | 318 | ||
| 285 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 319 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, |
| 286 | Maxwell::ShaderProgram program_type, | 320 | Maxwell::ShaderProgram program_type, |
| 287 | GLShader::ProgramResult result) { | 321 | GLShader::ProgramResult result) { |
| 288 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 322 | return std::shared_ptr<CachedShader>( |
| 323 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 324 | } | ||
| 325 | |||
| 326 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | ||
| 327 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||
| 328 | |||
| 329 | const auto code_size{CalculateProgramSize(code)}; | ||
| 330 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||
| 331 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 332 | std::move(code), {})); | ||
| 333 | |||
| 334 | return std::shared_ptr<CachedShader>( | ||
| 335 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 336 | } | ||
| 337 | |||
| 338 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | ||
| 339 | GLShader::ProgramResult result) { | ||
| 340 | return std::shared_ptr<CachedShader>( | ||
| 341 | new CachedShader(params, ProgramType::Compute, std::move(result))); | ||
| 289 | } | 342 | } |
| 290 | 343 | ||
| 291 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 344 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 292 | GLuint handle{}; | 345 | GLuint handle{}; |
| 293 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 346 | if (program_type == ProgramType::Geometry) { |
| 294 | handle = GetGeometryShader(variant); | 347 | handle = GetGeometryShader(variant); |
| 295 | } else { | 348 | } else { |
| 296 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 349 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| @@ -308,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 308 | handle = program->handle; | 361 | handle = program->handle; |
| 309 | } | 362 | } |
| 310 | 363 | ||
| 311 | auto base_bindings{variant.base_bindings}; | 364 | auto base_bindings = variant.base_bindings; |
| 312 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 365 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); |
| 366 | if (program_type != ProgramType::Compute) { | ||
| 367 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||
| 368 | } | ||
| 313 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 369 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 314 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 370 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| 315 | 371 | ||
| @@ -589,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 589 | // No shader found - create a new one | 645 | // No shader found - create a new one |
| 590 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 646 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |
| 591 | ProgramCode program_code_b; | 647 | ProgramCode program_code_b; |
| 592 | if (program == Maxwell::ShaderProgram::VertexA) { | 648 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |
| 649 | if (is_program_a) { | ||
| 593 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 650 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 594 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 651 | program_code_b = GetShaderCode(memory_manager, program_addr_b, |
| 595 | memory_manager.GetPointer(program_addr_b)); | 652 | memory_manager.GetPointer(program_addr_b)); |
| 596 | } | 653 | } |
| 597 | 654 | ||
| 598 | const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 655 | const auto unique_identifier = |
| 656 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | ||
| 599 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 657 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |
| 600 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 658 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |
| 601 | host_ptr, unique_identifier}; | 659 | host_ptr, unique_identifier}; |
| @@ -612,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 612 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 670 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 613 | } | 671 | } |
| 614 | 672 | ||
| 673 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||
| 674 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 675 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 676 | auto kernel = TryGet(host_ptr); | ||
| 677 | if (kernel) { | ||
| 678 | return kernel; | ||
| 679 | } | ||
| 680 | |||
| 681 | // No kernel found - create a new one | ||
| 682 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||
| 683 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||
| 684 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||
| 685 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||
| 686 | host_ptr, unique_identifier}; | ||
| 687 | |||
| 688 | const auto found = precompiled_shaders.find(unique_identifier); | ||
| 689 | if (found == precompiled_shaders.end()) { | ||
| 690 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||
| 691 | } else { | ||
| 692 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||
| 693 | } | ||
| 694 | |||
| 695 | Register(kernel); | ||
| 696 | return kernel; | ||
| 697 | } | ||
| 698 | |||
| 615 | } // namespace OpenGL | 699 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index bbb53cdf4..a3106a0ff 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -61,6 +61,11 @@ public: | |||
| 61 | Maxwell::ShaderProgram program_type, | 61 | Maxwell::ShaderProgram program_type, |
| 62 | GLShader::ProgramResult result); | 62 | GLShader::ProgramResult result); |
| 63 | 63 | ||
| 64 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||
| 65 | |||
| 66 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 67 | GLShader::ProgramResult result); | ||
| 68 | |||
| 64 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 65 | return cpu_addr; | 70 | return cpu_addr; |
| 66 | } | 71 | } |
| @@ -78,7 +83,7 @@ public: | |||
| 78 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 83 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 79 | 84 | ||
| 80 | private: | 85 | private: |
| 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 86 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 82 | GLShader::ProgramResult result); | 87 | GLShader::ProgramResult result); |
| 83 | 88 | ||
| 84 | // Geometry programs. These are needed because GLSL needs an input topology but it's not | 89 | // Geometry programs. These are needed because GLSL needs an input topology but it's not |
| @@ -104,7 +109,7 @@ private: | |||
| 104 | u8* host_ptr{}; | 109 | u8* host_ptr{}; |
| 105 | VAddr cpu_addr{}; | 110 | VAddr cpu_addr{}; |
| 106 | u64 unique_identifier{}; | 111 | u64 unique_identifier{}; |
| 107 | Maxwell::ShaderProgram program_type{}; | 112 | ProgramType program_type{}; |
| 108 | ShaderDiskCacheOpenGL& disk_cache; | 113 | ShaderDiskCacheOpenGL& disk_cache; |
| 109 | const PrecompiledPrograms& precompiled_programs; | 114 | const PrecompiledPrograms& precompiled_programs; |
| 110 | 115 | ||
| @@ -132,6 +137,9 @@ public: | |||
| 132 | /// Gets the current specified shader stage program | 137 | /// Gets the current specified shader stage program |
| 133 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 138 | Shader GetStageProgram(Maxwell::ShaderProgram program); |
| 134 | 139 | ||
| 140 | /// Gets a compute kernel in the passed address | ||
| 141 | Shader GetComputeKernel(GPUVAddr code_addr); | ||
| 142 | |||
| 135 | protected: | 143 | protected: |
| 136 | // We do not have to flush this cache as things in it are never modified by us. | 144 | // We do not have to flush this cache as things in it are never modified by us. |
| 137 | void FlushObjectInner(const Shader& object) override {} | 145 | void FlushObjectInner(const Shader& object) override {} |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e19d502bc..ffe26b241 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -37,7 +37,6 @@ using namespace std::string_literals; | |||
| 37 | using namespace VideoCommon::Shader; | 37 | using namespace VideoCommon::Shader; |
| 38 | 38 | ||
| 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 40 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 41 | using Operation = const OperationNode&; | 40 | using Operation = const OperationNode&; |
| 42 | 41 | ||
| 43 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 42 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| @@ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 162 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 161 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 163 | } | 162 | } |
| 164 | 163 | ||
| 164 | constexpr bool IsVertexShader(ProgramType stage) { | ||
| 165 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||
| 166 | } | ||
| 167 | |||
| 165 | class GLSLDecompiler final { | 168 | class GLSLDecompiler final { |
| 166 | public: | 169 | public: |
| 167 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | 170 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 168 | std::string suffix) | 171 | std::string suffix) |
| 169 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 172 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 170 | 173 | ||
| @@ -248,21 +251,21 @@ public: | |||
| 248 | } | 251 | } |
| 249 | entries.clip_distances = ir.GetClipDistances(); | 252 | entries.clip_distances = ir.GetClipDistances(); |
| 250 | entries.shader_viewport_layer_array = | 253 | entries.shader_viewport_layer_array = |
| 251 | stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); | 254 | IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); |
| 252 | entries.shader_length = ir.GetLength(); | 255 | entries.shader_length = ir.GetLength(); |
| 253 | return entries; | 256 | return entries; |
| 254 | } | 257 | } |
| 255 | 258 | ||
| 256 | private: | 259 | private: |
| 257 | void DeclareVertex() { | 260 | void DeclareVertex() { |
| 258 | if (stage != ShaderStage::Vertex) | 261 | if (!IsVertexShader(stage)) |
| 259 | return; | 262 | return; |
| 260 | 263 | ||
| 261 | DeclareVertexRedeclarations(); | 264 | DeclareVertexRedeclarations(); |
| 262 | } | 265 | } |
| 263 | 266 | ||
| 264 | void DeclareGeometry() { | 267 | void DeclareGeometry() { |
| 265 | if (stage != ShaderStage::Geometry) { | 268 | if (stage != ProgramType::Geometry) { |
| 266 | return; | 269 | return; |
| 267 | } | 270 | } |
| 268 | 271 | ||
| @@ -293,14 +296,14 @@ private: | |||
| 293 | break; | 296 | break; |
| 294 | } | 297 | } |
| 295 | } | 298 | } |
| 296 | if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { | 299 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { |
| 297 | if (ir.UsesLayer()) { | 300 | if (ir.UsesLayer()) { |
| 298 | code.AddLine("int gl_Layer;"); | 301 | code.AddLine("int gl_Layer;"); |
| 299 | } | 302 | } |
| 300 | if (ir.UsesViewportIndex()) { | 303 | if (ir.UsesViewportIndex()) { |
| 301 | code.AddLine("int gl_ViewportIndex;"); | 304 | code.AddLine("int gl_ViewportIndex;"); |
| 302 | } | 305 | } |
| 303 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && | 306 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && |
| 304 | !device.HasVertexViewportLayer()) { | 307 | !device.HasVertexViewportLayer()) { |
| 305 | LOG_ERROR( | 308 | LOG_ERROR( |
| 306 | Render_OpenGL, | 309 | Render_OpenGL, |
| @@ -337,11 +340,16 @@ private: | |||
| 337 | } | 340 | } |
| 338 | 341 | ||
| 339 | void DeclareLocalMemory() { | 342 | void DeclareLocalMemory() { |
| 340 | if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | 343 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at |
| 341 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 344 | // specialization time. |
| 342 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | 345 | const u64 local_memory_size = |
| 343 | code.AddNewLine(); | 346 | stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); |
| 347 | if (local_memory_size == 0) { | ||
| 348 | return; | ||
| 344 | } | 349 | } |
| 350 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||
| 351 | code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | ||
| 352 | code.AddNewLine(); | ||
| 345 | } | 353 | } |
| 346 | 354 | ||
| 347 | void DeclareInternalFlags() { | 355 | void DeclareInternalFlags() { |
| @@ -395,12 +403,12 @@ private: | |||
| 395 | const u32 location{GetGenericAttributeIndex(index)}; | 403 | const u32 location{GetGenericAttributeIndex(index)}; |
| 396 | 404 | ||
| 397 | std::string name{GetInputAttribute(index)}; | 405 | std::string name{GetInputAttribute(index)}; |
| 398 | if (stage == ShaderStage::Geometry) { | 406 | if (stage == ProgramType::Geometry) { |
| 399 | name = "gs_" + name + "[]"; | 407 | name = "gs_" + name + "[]"; |
| 400 | } | 408 | } |
| 401 | 409 | ||
| 402 | std::string suffix; | 410 | std::string suffix; |
| 403 | if (stage == ShaderStage::Fragment) { | 411 | if (stage == ProgramType::Fragment) { |
| 404 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 412 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 405 | if (skip_unused && input_mode == AttributeUse::Unused) { | 413 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 406 | return; | 414 | return; |
| @@ -412,7 +420,7 @@ private: | |||
| 412 | } | 420 | } |
| 413 | 421 | ||
| 414 | void DeclareOutputAttributes() { | 422 | void DeclareOutputAttributes() { |
| 415 | if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | 423 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { |
| 416 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 424 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 417 | DeclareOutputAttribute(ToGenericAttribute(i)); | 425 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 418 | } | 426 | } |
| @@ -534,7 +542,7 @@ private: | |||
| 534 | constexpr u32 element_stride{4}; | 542 | constexpr u32 element_stride{4}; |
| 535 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 543 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 536 | 544 | ||
| 537 | const bool declared{stage != ShaderStage::Fragment || | 545 | const bool declared{stage != ProgramType::Fragment || |
| 538 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | 546 | header.ps.GetAttributeUse(index) != AttributeUse::Unused}; |
| 539 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | 547 | const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; |
| 540 | code.AddLine("case 0x{:x}: return {};", address, value); | 548 | code.AddLine("case 0x{:x}: return {};", address, value); |
| @@ -638,7 +646,7 @@ private: | |||
| 638 | } | 646 | } |
| 639 | 647 | ||
| 640 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 648 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 641 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | 649 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, |
| 642 | "Physical attributes in geometry shaders are not implemented"); | 650 | "Physical attributes in geometry shaders are not implemented"); |
| 643 | if (abuf->IsPhysicalBuffer()) { | 651 | if (abuf->IsPhysicalBuffer()) { |
| 644 | return fmt::format("readPhysicalAttribute(ftou({}))", | 652 | return fmt::format("readPhysicalAttribute(ftou({}))", |
| @@ -693,6 +701,9 @@ private: | |||
| 693 | } | 701 | } |
| 694 | 702 | ||
| 695 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 703 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 704 | if (stage == ProgramType::Compute) { | ||
| 705 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 706 | } | ||
| 696 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 707 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 697 | } | 708 | } |
| 698 | 709 | ||
| @@ -722,7 +733,7 @@ private: | |||
| 722 | 733 | ||
| 723 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 734 | std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 724 | const auto GeometryPass = [&](std::string_view name) { | 735 | const auto GeometryPass = [&](std::string_view name) { |
| 725 | if (stage == ShaderStage::Geometry && buffer) { | 736 | if (stage == ProgramType::Geometry && buffer) { |
| 726 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 737 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 727 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 738 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 728 | // this happens and what's its intent. | 739 | // this happens and what's its intent. |
| @@ -734,10 +745,10 @@ private: | |||
| 734 | switch (attribute) { | 745 | switch (attribute) { |
| 735 | case Attribute::Index::Position: | 746 | case Attribute::Index::Position: |
| 736 | switch (stage) { | 747 | switch (stage) { |
| 737 | case ShaderStage::Geometry: | 748 | case ProgramType::Geometry: |
| 738 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | 749 | return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), |
| 739 | GetSwizzle(element)); | 750 | GetSwizzle(element)); |
| 740 | case ShaderStage::Fragment: | 751 | case ProgramType::Fragment: |
| 741 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | 752 | return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); |
| 742 | default: | 753 | default: |
| 743 | UNREACHABLE(); | 754 | UNREACHABLE(); |
| @@ -758,7 +769,7 @@ private: | |||
| 758 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 769 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 759 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 770 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 760 | // shader. | 771 | // shader. |
| 761 | ASSERT(stage == ShaderStage::Vertex); | 772 | ASSERT(IsVertexShader(stage)); |
| 762 | switch (element) { | 773 | switch (element) { |
| 763 | case 2: | 774 | case 2: |
| 764 | // Config pack's first value is instance_id. | 775 | // Config pack's first value is instance_id. |
| @@ -770,7 +781,7 @@ private: | |||
| 770 | return "0"; | 781 | return "0"; |
| 771 | case Attribute::Index::FrontFacing: | 782 | case Attribute::Index::FrontFacing: |
| 772 | // TODO(Subv): Find out what the values are for the other elements. | 783 | // TODO(Subv): Find out what the values are for the other elements. |
| 773 | ASSERT(stage == ShaderStage::Fragment); | 784 | ASSERT(stage == ProgramType::Fragment); |
| 774 | switch (element) { | 785 | switch (element) { |
| 775 | case 3: | 786 | case 3: |
| 776 | return "itof(gl_FrontFacing ? -1 : 0)"; | 787 | return "itof(gl_FrontFacing ? -1 : 0)"; |
| @@ -792,7 +803,7 @@ private: | |||
| 792 | return value; | 803 | return value; |
| 793 | } | 804 | } |
| 794 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders | 805 | // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders |
| 795 | const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | 806 | const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; |
| 796 | 807 | ||
| 797 | const std::string temporary = code.GenerateTemporary(); | 808 | const std::string temporary = code.GenerateTemporary(); |
| 798 | code.AddLine("{}float {} = {};", precise, temporary, value); | 809 | code.AddLine("{}float {} = {};", precise, temporary, value); |
| @@ -827,12 +838,12 @@ private: | |||
| 827 | UNIMPLEMENTED(); | 838 | UNIMPLEMENTED(); |
| 828 | return {}; | 839 | return {}; |
| 829 | case 1: | 840 | case 1: |
| 830 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | 841 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 831 | return {}; | 842 | return {}; |
| 832 | } | 843 | } |
| 833 | return std::make_pair("gl_Layer", true); | 844 | return std::make_pair("gl_Layer", true); |
| 834 | case 2: | 845 | case 2: |
| 835 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | 846 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { |
| 836 | return {}; | 847 | return {}; |
| 837 | } | 848 | } |
| 838 | return std::make_pair("gl_ViewportIndex", true); | 849 | return std::make_pair("gl_ViewportIndex", true); |
| @@ -1069,6 +1080,9 @@ private: | |||
| 1069 | target = result->first; | 1080 | target = result->first; |
| 1070 | is_integer = result->second; | 1081 | is_integer = result->second; |
| 1071 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1082 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1083 | if (stage == ProgramType::Compute) { | ||
| 1084 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 1085 | } | ||
| 1072 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1086 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1073 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1087 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1074 | const std::string real = Visit(gmem->GetRealAddress()); | 1088 | const std::string real = Visit(gmem->GetRealAddress()); |
| @@ -1622,7 +1636,7 @@ private: | |||
| 1622 | } | 1636 | } |
| 1623 | 1637 | ||
| 1624 | std::string Exit(Operation operation) { | 1638 | std::string Exit(Operation operation) { |
| 1625 | if (stage != ShaderStage::Fragment) { | 1639 | if (stage != ProgramType::Fragment) { |
| 1626 | code.AddLine("return;"); | 1640 | code.AddLine("return;"); |
| 1627 | return {}; | 1641 | return {}; |
| 1628 | } | 1642 | } |
| @@ -1673,7 +1687,7 @@ private: | |||
| 1673 | } | 1687 | } |
| 1674 | 1688 | ||
| 1675 | std::string EmitVertex(Operation operation) { | 1689 | std::string EmitVertex(Operation operation) { |
| 1676 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1690 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1677 | "EmitVertex is expected to be used in a geometry shader."); | 1691 | "EmitVertex is expected to be used in a geometry shader."); |
| 1678 | 1692 | ||
| 1679 | // If a geometry shader is attached, it will always flip (it's the last stage before | 1693 | // If a geometry shader is attached, it will always flip (it's the last stage before |
| @@ -1684,7 +1698,7 @@ private: | |||
| 1684 | } | 1698 | } |
| 1685 | 1699 | ||
| 1686 | std::string EndPrimitive(Operation operation) { | 1700 | std::string EndPrimitive(Operation operation) { |
| 1687 | ASSERT_MSG(stage == ShaderStage::Geometry, | 1701 | ASSERT_MSG(stage == ProgramType::Geometry, |
| 1688 | "EndPrimitive is expected to be used in a geometry shader."); | 1702 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1689 | 1703 | ||
| 1690 | code.AddLine("EndPrimitive();"); | 1704 | code.AddLine("EndPrimitive();"); |
| @@ -1919,7 +1933,7 @@ private: | |||
| 1919 | } | 1933 | } |
| 1920 | 1934 | ||
| 1921 | u32 GetNumPhysicalInputAttributes() const { | 1935 | u32 GetNumPhysicalInputAttributes() const { |
| 1922 | return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 1936 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 1923 | } | 1937 | } |
| 1924 | 1938 | ||
| 1925 | u32 GetNumPhysicalAttributes() const { | 1939 | u32 GetNumPhysicalAttributes() const { |
| @@ -1932,7 +1946,7 @@ private: | |||
| 1932 | 1946 | ||
| 1933 | const Device& device; | 1947 | const Device& device; |
| 1934 | const ShaderIR& ir; | 1948 | const ShaderIR& ir; |
| 1935 | const ShaderStage stage; | 1949 | const ProgramType stage; |
| 1936 | const std::string suffix; | 1950 | const std::string suffix; |
| 1937 | const Header header; | 1951 | const Header header; |
| 1938 | 1952 | ||
| @@ -1963,7 +1977,7 @@ std::string GetCommonDeclarations() { | |||
| 1963 | MAX_CONSTBUFFER_ELEMENTS); | 1977 | MAX_CONSTBUFFER_ELEMENTS); |
| 1964 | } | 1978 | } |
| 1965 | 1979 | ||
| 1966 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | 1980 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 1967 | const std::string& suffix) { | 1981 | const std::string& suffix) { |
| 1968 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 1982 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 1969 | decompiler.Decompile(); | 1983 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 02586736d..2ea02f5bf 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -12,14 +12,26 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/shader/shader_ir.h" | 13 | #include "video_core/shader/shader_ir.h" |
| 14 | 14 | ||
| 15 | namespace OpenGL { | ||
| 16 | class Device; | ||
| 17 | } | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 20 | class ShaderIR; | 16 | class ShaderIR; |
| 21 | } | 17 | } |
| 22 | 18 | ||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | |||
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | |||
| 23 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 24 | 36 | ||
| 25 | struct ShaderEntries; | 37 | struct ShaderEntries; |
| @@ -85,6 +97,6 @@ struct ShaderEntries { | |||
| 85 | std::string GetCommonDeclarations(); | 97 | std::string GetCommonDeclarations(); |
| 86 | 98 | ||
| 87 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 99 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 88 | Maxwell::ShaderStage stage, const std::string& suffix); | 100 | ProgramType stage, const std::string& suffix); |
| 89 | 101 | ||
| 90 | } // namespace OpenGL::GLShader | 102 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 7893d1e26..969fe9ced 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 51 | 51 | ||
| 52 | } // namespace | 52 | } // namespace |
| 53 | 53 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | 55 | u32 program_code_size, u32 program_code_size_b, |
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 56 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 57 | : unique_identifier{unique_identifier}, program_type{program_type}, |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 4f296dda6..cc8bbd61e 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | 22 | ||
| 24 | namespace Core { | 23 | namespace Core { |
| @@ -34,14 +33,11 @@ namespace OpenGL { | |||
| 34 | struct ShaderDiskCacheUsage; | 33 | struct ShaderDiskCacheUsage; |
| 35 | struct ShaderDiskCacheDump; | 34 | struct ShaderDiskCacheDump; |
| 36 | 35 | ||
| 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 38 | |||
| 39 | using ProgramCode = std::vector<u64>; | 36 | using ProgramCode = std::vector<u64>; |
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 41 | |||
| 42 | using TextureBufferUsage = std::bitset<64>; | 38 | using TextureBufferUsage = std::bitset<64>; |
| 43 | 39 | ||
| 44 | /// Allocated bindings used by an OpenGL shader program. | 40 | /// Allocated bindings used by an OpenGL shader program |
| 45 | struct BaseBindings { | 41 | struct BaseBindings { |
| 46 | u32 cbuf{}; | 42 | u32 cbuf{}; |
| 47 | u32 gmem{}; | 43 | u32 gmem{}; |
| @@ -126,7 +122,7 @@ namespace OpenGL { | |||
| 126 | /// Describes a shader how it's used by the guest GPU | 122 | /// Describes a shader how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 123 | class ShaderDiskCacheRaw { |
| 128 | public: | 124 | public: |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 130 | u32 program_code_size, u32 program_code_size_b, | 126 | u32 program_code_size, u32 program_code_size_b, |
| 131 | ProgramCode program_code, ProgramCode program_code_b); | 127 | ProgramCode program_code, ProgramCode program_code_b); |
| 132 | ShaderDiskCacheRaw(); | 128 | ShaderDiskCacheRaw(); |
| @@ -141,30 +137,13 @@ public: | |||
| 141 | } | 137 | } |
| 142 | 138 | ||
| 143 | bool HasProgramA() const { | 139 | bool HasProgramA() const { |
| 144 | return program_type == Maxwell::ShaderProgram::VertexA; | 140 | return program_type == ProgramType::VertexA; |
| 145 | } | 141 | } |
| 146 | 142 | ||
| 147 | Maxwell::ShaderProgram GetProgramType() const { | 143 | ProgramType GetProgramType() const { |
| 148 | return program_type; | 144 | return program_type; |
| 149 | } | 145 | } |
| 150 | 146 | ||
| 151 | Maxwell::ShaderStage GetProgramStage() const { | ||
| 152 | switch (program_type) { | ||
| 153 | case Maxwell::ShaderProgram::VertexA: | ||
| 154 | case Maxwell::ShaderProgram::VertexB: | ||
| 155 | return Maxwell::ShaderStage::Vertex; | ||
| 156 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 157 | return Maxwell::ShaderStage::TesselationControl; | ||
| 158 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 159 | return Maxwell::ShaderStage::TesselationEval; | ||
| 160 | case Maxwell::ShaderProgram::Geometry: | ||
| 161 | return Maxwell::ShaderStage::Geometry; | ||
| 162 | case Maxwell::ShaderProgram::Fragment: | ||
| 163 | return Maxwell::ShaderStage::Fragment; | ||
| 164 | } | ||
| 165 | UNREACHABLE(); | ||
| 166 | } | ||
| 167 | |||
| 168 | const ProgramCode& GetProgramCode() const { | 147 | const ProgramCode& GetProgramCode() const { |
| 169 | return program_code; | 148 | return program_code; |
| 170 | } | 149 | } |
| @@ -175,7 +154,7 @@ public: | |||
| 175 | 154 | ||
| 176 | private: | 155 | private: |
| 177 | u64 unique_identifier{}; | 156 | u64 unique_identifier{}; |
| 178 | Maxwell::ShaderProgram program_type{}; | 157 | ProgramType program_type{}; |
| 179 | u32 program_code_size{}; | 158 | u32 program_code_size{}; |
| 180 | u32 program_code_size_b{}; | 159 | u32 program_code_size_b{}; |
| 181 | 160 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f9ee8429e..3a8d9e1da 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| 14 | using VideoCommon::Shader::ProgramCode; | 14 | using VideoCommon::Shader::ProgramCode; |
| 15 | using VideoCommon::Shader::ShaderIR; | 15 | using VideoCommon::Shader::ShaderIR; |
| 16 | 16 | ||
| 17 | static constexpr u32 PROGRAM_OFFSET{10}; | 17 | static constexpr u32 PROGRAM_OFFSET = 10; |
| 18 | static constexpr u32 COMPUTE_OFFSET = 0; | ||
| 18 | 19 | ||
| 19 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | 20 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |
| 20 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 21 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |
| @@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 29 | }; | 30 | }; |
| 30 | 31 | ||
| 31 | )"; | 32 | )"; |
| 32 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 33 | ProgramResult program = | ||
| 34 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||
| 35 | 33 | ||
| 34 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||
| 35 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||
| 36 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||
| 36 | out += program.first; | 37 | out += program.first; |
| 37 | 38 | ||
| 38 | if (setup.IsDualProgram()) { | 39 | if (setup.IsDualProgram()) { |
| 39 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); | 40 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); |
| 40 | ProgramResult program_b = | 41 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||
| 42 | |||
| 43 | out += program_b.first; | 42 | out += program_b.first; |
| 44 | } | 43 | } |
| 45 | 44 | ||
| @@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | )"; | 81 | )"; |
| 82 | |||
| 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 83 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 84 | ProgramResult program = | 84 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |
| 85 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | ||
| 86 | out += program.first; | 85 | out += program.first; |
| 87 | 86 | ||
| 88 | out += R"( | 87 | out += R"( |
| @@ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 116 | 115 | ||
| 117 | )"; | 116 | )"; |
| 118 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | 117 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); |
| 119 | ProgramResult program = | 118 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |
| 120 | Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||
| 121 | |||
| 122 | out += program.first; | 119 | out += program.first; |
| 123 | 120 | ||
| 124 | out += R"( | 121 | out += R"( |
| @@ -130,4 +127,22 @@ void main() { | |||
| 130 | return {std::move(out), std::move(program.second)}; | 127 | return {std::move(out), std::move(program.second)}; |
| 131 | } | 128 | } |
| 132 | 129 | ||
| 130 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||
| 131 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 132 | |||
| 133 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 134 | out += GetCommonDeclarations(); | ||
| 135 | |||
| 136 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||
| 137 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 138 | out += program.first; | ||
| 139 | |||
| 140 | out += R"( | ||
| 141 | void main() { | ||
| 142 | execute_compute(); | ||
| 143 | } | ||
| 144 | )"; | ||
| 145 | return {std::move(out), std::move(program.second)}; | ||
| 146 | } | ||
| 147 | |||
| 133 | } // namespace OpenGL::GLShader | 148 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 7cbc590f8..3833e88ab 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| 54 | /// Generates the GLSL fragment shader program source code for the given FS program | 54 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); |
| 56 | 56 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||
| 59 | |||
| 57 | } // namespace OpenGL::GLShader | 60 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 5f3fe067e..9e74eda0d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,21 +10,25 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL::GLShader { | 11 | namespace OpenGL::GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadShader(const char* source, GLenum type) { | 13 | namespace { |
| 14 | const char* debug_type; | 14 | const char* GetStageDebugName(GLenum type) { |
| 15 | switch (type) { | 15 | switch (type) { |
| 16 | case GL_VERTEX_SHADER: | 16 | case GL_VERTEX_SHADER: |
| 17 | debug_type = "vertex"; | 17 | return "vertex"; |
| 18 | break; | ||
| 19 | case GL_GEOMETRY_SHADER: | 18 | case GL_GEOMETRY_SHADER: |
| 20 | debug_type = "geometry"; | 19 | return "geometry"; |
| 21 | break; | ||
| 22 | case GL_FRAGMENT_SHADER: | 20 | case GL_FRAGMENT_SHADER: |
| 23 | debug_type = "fragment"; | 21 | return "fragment"; |
| 24 | break; | 22 | case GL_COMPUTE_SHADER: |
| 25 | default: | 23 | return "compute"; |
| 26 | UNREACHABLE(); | ||
| 27 | } | 24 | } |
| 25 | UNIMPLEMENTED(); | ||
| 26 | return "unknown"; | ||
| 27 | } | ||
| 28 | } // Anonymous namespace | ||
| 29 | |||
| 30 | GLuint LoadShader(const char* source, GLenum type) { | ||
| 31 | const char* debug_type = GetStageDebugName(type); | ||
| 28 | const GLuint shader_id = glCreateShader(type); | 32 | const GLuint shader_id = glCreateShader(type); |
| 29 | glShaderSource(shader_id, 1, &source, nullptr); | 33 | glShaderSource(shader_id, 1, &source, nullptr); |
| 30 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | 34 | LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); |