diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 96 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 101 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 106 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 56 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 5 |
12 files changed, 339 insertions, 109 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index d83dca25a..466a911db 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | #include "common/scope_exit.h" | 15 | #include "common/scope_exit.h" |
| 16 | #include "core/settings.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_device.h" | 17 | #include "video_core/renderer_opengl/gl_device.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | 19 | ||
| @@ -183,10 +184,16 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 183 | has_precise_bug = TestPreciseBug(); | 184 | has_precise_bug = TestPreciseBug(); |
| 184 | has_broken_compute = is_intel_proprietary; | 185 | has_broken_compute = is_intel_proprietary; |
| 185 | has_fast_buffer_sub_data = is_nvidia; | 186 | has_fast_buffer_sub_data = is_nvidia; |
| 187 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | ||
| 188 | GLAD_GL_NV_compute_program5; | ||
| 186 | 189 | ||
| 187 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 190 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 188 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 191 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
| 189 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | 192 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |
| 193 | |||
| 194 | if (Settings::values.use_assembly_shaders && !use_assembly_shaders) { | ||
| 195 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); | ||
| 196 | } | ||
| 190 | } | 197 | } |
| 191 | 198 | ||
| 192 | Device::Device(std::nullptr_t) { | 199 | Device::Device(std::nullptr_t) { |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a55050cb5..e915dbd86 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -88,6 +88,10 @@ public: | |||
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool UseAssemblyShaders() const { | ||
| 92 | return use_assembly_shaders; | ||
| 93 | } | ||
| 94 | |||
| 91 | private: | 95 | private: |
| 92 | static bool TestVariableAoffi(); | 96 | static bool TestVariableAoffi(); |
| 93 | static bool TestPreciseBug(); | 97 | static bool TestPreciseBug(); |
| @@ -107,6 +111,7 @@ private: | |||
| 107 | bool has_precise_bug{}; | 111 | bool has_precise_bug{}; |
| 108 | bool has_broken_compute{}; | 112 | bool has_broken_compute{}; |
| 109 | bool has_fast_buffer_sub_data{}; | 113 | bool has_fast_buffer_sub_data{}; |
| 114 | bool use_assembly_shaders{}; | ||
| 110 | }; | 115 | }; |
| 111 | 116 | ||
| 112 | } // namespace OpenGL | 117 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 69dcf952f..92ca22136 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) { | |||
| 94 | } // Anonymous namespace | 94 | } // Anonymous namespace |
| 95 | 95 | ||
| 96 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 96 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 97 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 97 | const Device& device, ScreenInfo& info, |
| 98 | StateTracker& state_tracker) | 98 | ProgramManager& program_manager, StateTracker& state_tracker) |
| 99 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | 99 | : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, |
| 100 | state_tracker}, | ||
| 100 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, | 101 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |
| 101 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, | 102 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |
| 102 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, | 103 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |
| 103 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | 104 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { |
| 104 | CheckExtensions(); | 105 | CheckExtensions(); |
| 106 | |||
| 107 | if (device.UseAssemblyShaders()) { | ||
| 108 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 109 | for (const GLuint cbuf : staging_cbufs) { | ||
| 110 | glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), | ||
| 111 | nullptr, 0); | ||
| 112 | } | ||
| 113 | } | ||
| 105 | } | 114 | } |
| 106 | 115 | ||
| 107 | RasterizerOpenGL::~RasterizerOpenGL() {} | 116 | RasterizerOpenGL::~RasterizerOpenGL() { |
| 117 | if (device.UseAssemblyShaders()) { | ||
| 118 | glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 119 | } | ||
| 120 | } | ||
| 108 | 121 | ||
| 109 | void RasterizerOpenGL::CheckExtensions() { | 122 | void RasterizerOpenGL::CheckExtensions() { |
| 110 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { | 123 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { |
| @@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 230 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 243 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| 231 | MICROPROFILE_SCOPE(OpenGL_Shader); | 244 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 232 | auto& gpu = system.GPU().Maxwell3D(); | 245 | auto& gpu = system.GPU().Maxwell3D(); |
| 246 | std::size_t num_ssbos = 0; | ||
| 233 | u32 clip_distances = 0; | 247 | u32 clip_distances = 0; |
| 234 | 248 | ||
| 235 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 249 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 261 | 275 | ||
| 262 | Shader shader{shader_cache.GetStageProgram(program)}; | 276 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 263 | 277 | ||
| 278 | if (device.UseAssemblyShaders()) { | ||
| 279 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this | ||
| 280 | // all stages share the same bindings. | ||
| 281 | const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); | ||
| 282 | ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); | ||
| 283 | num_ssbos += num_stage_ssbos; | ||
| 284 | } | ||
| 285 | |||
| 264 | // Stage indices are 0 - 5 | 286 | // Stage indices are 0 - 5 |
| 265 | const std::size_t stage = index == 0 ? 0 : index - 1; | 287 | const std::size_t stage = index == 0 ? 0 : index - 1; |
| 266 | SetupDrawConstBuffers(stage, shader); | 288 | SetupDrawConstBuffers(stage, shader); |
| @@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 526 | SyncFramebufferSRGB(); | 548 | SyncFramebufferSRGB(); |
| 527 | 549 | ||
| 528 | buffer_cache.Acquire(); | 550 | buffer_cache.Acquire(); |
| 551 | current_cbuf = 0; | ||
| 529 | 552 | ||
| 530 | std::size_t buffer_size = CalculateVertexArraysSize(); | 553 | std::size_t buffer_size = CalculateVertexArraysSize(); |
| 531 | 554 | ||
| @@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 535 | } | 558 | } |
| 536 | 559 | ||
| 537 | // Uniform space for the 5 shader stages | 560 | // Uniform space for the 5 shader stages |
| 538 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + | 561 | buffer_size = |
| 539 | (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * | 562 | Common::AlignUp<std::size_t>(buffer_size, 4) + |
| 540 | Maxwell::MaxShaderStage; | 563 | (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; |
| 541 | 564 | ||
| 542 | // Add space for at least 18 constant buffers | 565 | // Add space for at least 18 constant buffers |
| 543 | buffer_size += Maxwell::MaxConstBuffers * | 566 | buffer_size += Maxwell::MaxConstBuffers * |
| @@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 558 | } | 581 | } |
| 559 | 582 | ||
| 560 | // Setup emulation uniform buffer. | 583 | // Setup emulation uniform buffer. |
| 561 | GLShader::MaxwellUniformData ubo; | 584 | if (!device.UseAssemblyShaders()) { |
| 562 | ubo.SetFromRegs(gpu); | 585 | MaxwellUniformData ubo; |
| 563 | const auto [buffer, offset] = | 586 | ubo.SetFromRegs(gpu); |
| 564 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 587 | const auto [buffer, offset] = |
| 565 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, | 588 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 566 | static_cast<GLsizeiptr>(sizeof(ubo))); | 589 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, |
| 590 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 591 | } | ||
| 567 | 592 | ||
| 568 | // Setup shaders and their used resources. | 593 | // Setup shaders and their used resources. |
| 569 | texture_cache.GuardSamplers(true); | 594 | texture_cache.GuardSamplers(true); |
| @@ -635,11 +660,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 635 | } | 660 | } |
| 636 | 661 | ||
| 637 | buffer_cache.Acquire(); | 662 | buffer_cache.Acquire(); |
| 663 | current_cbuf = 0; | ||
| 638 | 664 | ||
| 639 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 665 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 640 | SetupComputeTextures(kernel); | 666 | SetupComputeTextures(kernel); |
| 641 | SetupComputeImages(kernel); | 667 | SetupComputeImages(kernel); |
| 642 | program_manager.BindComputeShader(kernel->GetHandle()); | ||
| 643 | 668 | ||
| 644 | const std::size_t buffer_size = | 669 | const std::size_t buffer_size = |
| 645 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 670 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -652,6 +677,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 652 | buffer_cache.Unmap(); | 677 | buffer_cache.Unmap(); |
| 653 | 678 | ||
| 654 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 679 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 680 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 655 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 681 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 656 | ++num_queued_commands; | 682 | ++num_queued_commands; |
| 657 | } | 683 | } |
| @@ -812,14 +838,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 812 | } | 838 | } |
| 813 | 839 | ||
| 814 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { | 840 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { |
| 841 | static constexpr std::array PARAMETER_LUT = { | ||
| 842 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 843 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 844 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; | ||
| 845 | |||
| 815 | MICROPROFILE_SCOPE(OpenGL_UBO); | 846 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 816 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | 847 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 817 | const auto& shader_stage = stages[stage_index]; | 848 | const auto& shader_stage = stages[stage_index]; |
| 818 | 849 | ||
| 819 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 850 | u32 binding = |
| 851 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; | ||
| 820 | for (const auto& entry : shader->GetEntries().const_buffers) { | 852 | for (const auto& entry : shader->GetEntries().const_buffers) { |
| 821 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 853 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 822 | SetupConstBuffer(binding++, buffer, entry); | 854 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); |
| 823 | } | 855 | } |
| 824 | } | 856 | } |
| 825 | 857 | ||
| @@ -835,16 +867,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 835 | buffer.address = config.Address(); | 867 | buffer.address = config.Address(); |
| 836 | buffer.size = config.size; | 868 | buffer.size = config.size; |
| 837 | buffer.enabled = mask[entry.GetIndex()]; | 869 | buffer.enabled = mask[entry.GetIndex()]; |
| 838 | SetupConstBuffer(binding++, buffer, entry); | 870 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); |
| 839 | } | 871 | } |
| 840 | } | 872 | } |
| 841 | 873 | ||
| 842 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 874 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, |
| 875 | const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 843 | const ConstBufferEntry& entry) { | 876 | const ConstBufferEntry& entry) { |
| 844 | if (!buffer.enabled) { | 877 | if (!buffer.enabled) { |
| 845 | // Set values to zero to unbind buffers | 878 | // Set values to zero to unbind buffers |
| 846 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 879 | if (device.UseAssemblyShaders()) { |
| 847 | sizeof(float)); | 880 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); |
| 881 | } else { | ||
| 882 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||
| 883 | buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | ||
| 884 | } | ||
| 848 | return; | 885 | return; |
| 849 | } | 886 | } |
| 850 | 887 | ||
| @@ -853,9 +890,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const | |||
| 853 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | 890 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 854 | 891 | ||
| 855 | const auto alignment = device.GetUniformBufferAlignment(); | 892 | const auto alignment = device.GetUniformBufferAlignment(); |
| 856 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, | 893 | auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, |
| 857 | device.HasFastBufferSubData()); | 894 | device.HasFastBufferSubData()); |
| 858 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | 895 | if (!device.UseAssemblyShaders()) { |
| 896 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | ||
| 897 | return; | ||
| 898 | } | ||
| 899 | if (offset != 0) { | ||
| 900 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||
| 901 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | ||
| 902 | cbuf = staging_cbuf; | ||
| 903 | offset = 0; | ||
| 904 | } | ||
| 905 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | ||
| 859 | } | 906 | } |
| 860 | 907 | ||
| 861 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { | 908 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { |
| @@ -863,7 +910,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 863 | auto& memory_manager{gpu.MemoryManager()}; | 910 | auto& memory_manager{gpu.MemoryManager()}; |
| 864 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 911 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 865 | 912 | ||
| 866 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 913 | u32 binding = |
| 914 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 867 | for (const auto& entry : shader->GetEntries().global_memory_entries) { | 915 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 868 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | 916 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; |
| 869 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; | 917 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b94c65907..87f7fe159 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -56,8 +56,8 @@ struct DrawParameters; | |||
| 56 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | 56 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { |
| 57 | public: | 57 | public: |
| 58 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 58 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 59 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 59 | const Device& device, ScreenInfo& info, |
| 60 | StateTracker& state_tracker); | 60 | ProgramManager& program_manager, StateTracker& state_tracker); |
| 61 | ~RasterizerOpenGL() override; | 61 | ~RasterizerOpenGL() override; |
| 62 | 62 | ||
| 63 | void Draw(bool is_indexed, bool is_instanced) override; | 63 | void Draw(bool is_indexed, bool is_instanced) override; |
| @@ -106,7 +106,7 @@ private: | |||
| 106 | void SetupComputeConstBuffers(const Shader& kernel); | 106 | void SetupComputeConstBuffers(const Shader& kernel); |
| 107 | 107 | ||
| 108 | /// Configures a constant buffer. | 108 | /// Configures a constant buffer. |
| 109 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 109 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 110 | const ConstBufferEntry& entry); | 110 | const ConstBufferEntry& entry); |
| 111 | 111 | ||
| 112 | /// Configures the current global memory entries to use for the draw command. | 112 | /// Configures the current global memory entries to use for the draw command. |
| @@ -224,7 +224,7 @@ private: | |||
| 224 | 224 | ||
| 225 | void SetupShaders(GLenum primitive_mode); | 225 | void SetupShaders(GLenum primitive_mode); |
| 226 | 226 | ||
| 227 | const Device device; | 227 | const Device& device; |
| 228 | 228 | ||
| 229 | TextureCacheOpenGL texture_cache; | 229 | TextureCacheOpenGL texture_cache; |
| 230 | ShaderCacheOpenGL shader_cache; | 230 | ShaderCacheOpenGL shader_cache; |
| @@ -236,7 +236,7 @@ private: | |||
| 236 | 236 | ||
| 237 | Core::System& system; | 237 | Core::System& system; |
| 238 | ScreenInfo& screen_info; | 238 | ScreenInfo& screen_info; |
| 239 | GLShader::ProgramManager& program_manager; | 239 | ProgramManager& program_manager; |
| 240 | StateTracker& state_tracker; | 240 | StateTracker& state_tracker; |
| 241 | 241 | ||
| 242 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 242 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| @@ -248,6 +248,12 @@ private: | |||
| 248 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | 248 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> |
| 249 | enabled_transform_feedback_buffers; | 249 | enabled_transform_feedback_buffers; |
| 250 | 250 | ||
| 251 | static constexpr std::size_t NUM_CONSTANT_BUFFERS = | ||
| 252 | Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 253 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 254 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | ||
| 255 | std::size_t current_cbuf = 0; | ||
| 256 | |||
| 251 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 257 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 252 | std::size_t num_queued_commands = 0; | 258 | std::size_t num_queued_commands = 0; |
| 253 | 259 | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 97803d480..a787e27d2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -125,6 +125,15 @@ void OGLProgram::Release() { | |||
| 125 | handle = 0; | 125 | handle = 0; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | void OGLAssemblyProgram::Release() { | ||
| 129 | if (handle == 0) { | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 133 | glDeleteProgramsARB(1, &handle); | ||
| 134 | handle = 0; | ||
| 135 | } | ||
| 136 | |||
| 128 | void OGLPipeline::Create() { | 137 | void OGLPipeline::Create() { |
| 129 | if (handle != 0) | 138 | if (handle != 0) |
| 130 | return; | 139 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index de93f4212..f8b322227 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -167,6 +167,22 @@ public: | |||
| 167 | GLuint handle = 0; | 167 | GLuint handle = 0; |
| 168 | }; | 168 | }; |
| 169 | 169 | ||
| 170 | class OGLAssemblyProgram : private NonCopyable { | ||
| 171 | public: | ||
| 172 | OGLAssemblyProgram() = default; | ||
| 173 | |||
| 174 | OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 175 | |||
| 176 | ~OGLAssemblyProgram() { | ||
| 177 | Release(); | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Deletes the internal OpenGL resource | ||
| 181 | void Release(); | ||
| 182 | |||
| 183 | GLuint handle = 0; | ||
| 184 | }; | ||
| 185 | |||
| 170 | class OGLPipeline : private NonCopyable { | 186 | class OGLPipeline : private NonCopyable { |
| 171 | public: | 187 | public: |
| 172 | OGLPipeline() = default; | 188 | OGLPipeline() = default; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9759a7078..4cd0f36cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 97 | return {}; | 97 | return {}; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | constexpr GLenum AssemblyEnum(ShaderType shader_type) { | ||
| 101 | switch (shader_type) { | ||
| 102 | case ShaderType::Vertex: | ||
| 103 | return GL_VERTEX_PROGRAM_NV; | ||
| 104 | case ShaderType::TesselationControl: | ||
| 105 | return GL_TESS_CONTROL_PROGRAM_NV; | ||
| 106 | case ShaderType::TesselationEval: | ||
| 107 | return GL_TESS_EVALUATION_PROGRAM_NV; | ||
| 108 | case ShaderType::Geometry: | ||
| 109 | return GL_GEOMETRY_PROGRAM_NV; | ||
| 110 | case ShaderType::Fragment: | ||
| 111 | return GL_FRAGMENT_PROGRAM_NV; | ||
| 112 | case ShaderType::Compute: | ||
| 113 | return GL_COMPUTE_PROGRAM_NV; | ||
| 114 | } | ||
| 115 | return {}; | ||
| 116 | } | ||
| 117 | |||
| 100 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | 118 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 101 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 119 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 102 | } | 120 | } |
| @@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | |||
| 120 | return registry; | 138 | return registry; |
| 121 | } | 139 | } |
| 122 | 140 | ||
| 123 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, | 141 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, |
| 124 | u64 unique_identifier, const ShaderIR& ir, | 142 | const ShaderIR& ir, const Registry& registry, |
| 125 | const Registry& registry, bool hint_retrievable = false) { | 143 | bool hint_retrievable = false) { |
| 126 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | 144 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 127 | LOG_INFO(Render_OpenGL, "{}", shader_id); | 145 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 128 | 146 | ||
| 129 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | 147 | auto program = std::make_shared<ProgramHandle>(); |
| 130 | OGLShader shader; | 148 | |
| 131 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | 149 | if (device.UseAssemblyShaders()) { |
| 150 | const std::string arb = "Not implemented"; | ||
| 151 | |||
| 152 | GLuint& arb_prog = program->assembly_program.handle; | ||
| 153 | |||
| 154 | // Commented out functions signal OpenGL errors but are compatible with apitrace. | ||
| 155 | // Use them only to capture and replay on apitrace. | ||
| 156 | #if 0 | ||
| 157 | glGenProgramsNV(1, &arb_prog); | ||
| 158 | glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), | ||
| 159 | reinterpret_cast<const GLubyte*>(arb.data())); | ||
| 160 | #else | ||
| 161 | glGenProgramsARB(1, &arb_prog); | ||
| 162 | glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, | ||
| 163 | static_cast<GLsizei>(arb.size()), arb.data()); | ||
| 164 | #endif | ||
| 165 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); | ||
| 166 | if (err && *err) { | ||
| 167 | LOG_CRITICAL(Render_OpenGL, "{}", err); | ||
| 168 | LOG_INFO(Render_OpenGL, "\n{}", arb); | ||
| 169 | } | ||
| 170 | } else { | ||
| 171 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 172 | OGLShader shader; | ||
| 173 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||
| 174 | |||
| 175 | program->source_program.Create(true, hint_retrievable, shader.handle); | ||
| 176 | } | ||
| 132 | 177 | ||
| 133 | auto program = std::make_shared<OGLProgram>(); | ||
| 134 | program->Create(true, hint_retrievable, shader.handle); | ||
| 135 | return program; | 178 | return program; |
| 136 | } | 179 | } |
| 137 | 180 | ||
| @@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 153 | 196 | ||
| 154 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 197 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 155 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 198 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 156 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) | 199 | ShaderEntries entries, ProgramSharedPtr program_) |
| 157 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, | 200 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 158 | size_in_bytes{size_in_bytes}, program{std::move(program)} {} | 201 | size_in_bytes{size_in_bytes}, program{std::move(program_)} { |
| 202 | // Assign either the assembly program or source program. We can't have both. | ||
| 203 | handle = program->assembly_program.handle; | ||
| 204 | if (handle == 0) { | ||
| 205 | handle = program->source_program.handle; | ||
| 206 | } | ||
| 207 | ASSERT(handle != 0); | ||
| 208 | } | ||
| 159 | 209 | ||
| 160 | CachedShader::~CachedShader() = default; | 210 | CachedShader::~CachedShader() = default; |
| 161 | 211 | ||
| 162 | GLuint CachedShader::GetHandle() const { | 212 | GLuint CachedShader::GetHandle() const { |
| 163 | DEBUG_ASSERT(registry->IsConsistent()); | 213 | DEBUG_ASSERT(registry->IsConsistent()); |
| 164 | return program->handle; | 214 | return handle; |
| 165 | } | 215 | } |
| 166 | 216 | ||
| 167 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 217 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| @@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 239 | return; | 289 | return; |
| 240 | } | 290 | } |
| 241 | 291 | ||
| 242 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); | 292 | std::vector<ShaderDiskCachePrecompiled> gl_cache; |
| 293 | if (!device.UseAssemblyShaders()) { | ||
| 294 | // Only load precompiled cache when we are not using assembly shaders | ||
| 295 | gl_cache = disk_cache.LoadPrecompiled(); | ||
| 296 | } | ||
| 243 | const auto supported_formats = GetSupportedFormats(); | 297 | const auto supported_formats = GetSupportedFormats(); |
| 244 | 298 | ||
| 245 | // Track if precompiled cache was altered during loading to know if we have to | 299 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 278 | auto registry = MakeRegistry(entry); | 332 | auto registry = MakeRegistry(entry); |
| 279 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); | 333 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |
| 280 | 334 | ||
| 281 | std::shared_ptr<OGLProgram> program; | 335 | ProgramSharedPtr program; |
| 282 | if (precompiled_entry) { | 336 | if (precompiled_entry) { |
| 283 | // If the shader is precompiled, attempt to load it with | 337 | // If the shader is precompiled, attempt to load it with |
| 284 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | 338 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); |
| @@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 332 | return; | 386 | return; |
| 333 | } | 387 | } |
| 334 | 388 | ||
| 389 | if (device.UseAssemblyShaders()) { | ||
| 390 | // Don't store precompiled binaries for assembly shaders. | ||
| 391 | return; | ||
| 392 | } | ||
| 393 | |||
| 335 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 394 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 336 | // before precompiling them | 395 | // before precompiling them |
| 337 | 396 | ||
| @@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 339 | const u64 id = (*transferable)[i].unique_identifier; | 398 | const u64 id = (*transferable)[i].unique_identifier; |
| 340 | const auto it = find_precompiled(id); | 399 | const auto it = find_precompiled(id); |
| 341 | if (it == gl_cache.end()) { | 400 | if (it == gl_cache.end()) { |
| 342 | const GLuint program = runtime_cache.at(id).program->handle; | 401 | const GLuint program = runtime_cache.at(id).program->source_program.handle; |
| 343 | disk_cache.SavePrecompiled(id, program); | 402 | disk_cache.SavePrecompiled(id, program); |
| 344 | precompiled_cache_altered = true; | 403 | precompiled_cache_altered = true; |
| 345 | } | 404 | } |
| @@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 350 | } | 409 | } |
| 351 | } | 410 | } |
| 352 | 411 | ||
| 353 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | 412 | ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 354 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 413 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 355 | const std::unordered_set<GLenum>& supported_formats) { | 414 | const std::unordered_set<GLenum>& supported_formats) { |
| 356 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { | 415 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| @@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 358 | return {}; | 417 | return {}; |
| 359 | } | 418 | } |
| 360 | 419 | ||
| 361 | auto program = std::make_shared<OGLProgram>(); | 420 | auto program = std::make_shared<ProgramHandle>(); |
| 362 | program->handle = glCreateProgram(); | 421 | GLuint& handle = program->source_program.handle; |
| 363 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 422 | handle = glCreateProgram(); |
| 364 | glProgramBinary(program->handle, precompiled_entry.binary_format, | 423 | glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 365 | precompiled_entry.binary.data(), | 424 | glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), |
| 366 | static_cast<GLsizei>(precompiled_entry.binary.size())); | 425 | static_cast<GLsizei>(precompiled_entry.binary.size())); |
| 367 | 426 | ||
| 368 | GLint link_status; | 427 | GLint link_status; |
| 369 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | 428 | glGetProgramiv(handle, GL_LINK_STATUS, &link_status); |
| 370 | if (link_status == GL_FALSE) { | 429 | if (link_status == GL_FALSE) { |
| 371 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); | 430 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 372 | return {}; | 431 | return {}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 91690b470..b2ae8d7f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -43,8 +43,14 @@ struct UnspecializedShader; | |||
| 43 | using Shader = std::shared_ptr<CachedShader>; | 43 | using Shader = std::shared_ptr<CachedShader>; |
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 45 | 45 | ||
| 46 | struct ProgramHandle { | ||
| 47 | OGLProgram source_program; | ||
| 48 | OGLAssemblyProgram assembly_program; | ||
| 49 | }; | ||
| 50 | using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; | ||
| 51 | |||
| 46 | struct PrecompiledShader { | 52 | struct PrecompiledShader { |
| 47 | std::shared_ptr<OGLProgram> program; | 53 | ProgramSharedPtr program; |
| 48 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 54 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 49 | ShaderEntries entries; | 55 | ShaderEntries entries; |
| 50 | }; | 56 | }; |
| @@ -87,12 +93,13 @@ public: | |||
| 87 | private: | 93 | private: |
| 88 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 94 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 89 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 95 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 90 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | 96 | ShaderEntries entries, ProgramSharedPtr program); |
| 91 | 97 | ||
| 92 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 98 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 93 | ShaderEntries entries; | 99 | ShaderEntries entries; |
| 94 | std::size_t size_in_bytes = 0; | 100 | std::size_t size_in_bytes = 0; |
| 95 | std::shared_ptr<OGLProgram> program; | 101 | ProgramSharedPtr program; |
| 102 | GLuint handle = 0; | ||
| 96 | }; | 103 | }; |
| 97 | 104 | ||
| 98 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 105 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -115,7 +122,7 @@ protected: | |||
| 115 | void FlushObjectInner(const Shader& object) override {} | 122 | void FlushObjectInner(const Shader& object) override {} |
| 116 | 123 | ||
| 117 | private: | 124 | private: |
| 118 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( | 125 | ProgramSharedPtr GeneratePrecompiledProgram( |
| 119 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 126 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 120 | const std::unordered_set<GLenum>& supported_formats); | 127 | const std::unordered_set<GLenum>& supported_formats); |
| 121 | 128 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 9c7b0adbd..96605db84 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -6,45 +6,105 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 10 | 11 | ||
| 11 | namespace OpenGL::GLShader { | 12 | namespace OpenGL { |
| 12 | 13 | ||
| 13 | ProgramManager::ProgramManager() = default; | 14 | ProgramManager::ProgramManager(const Device& device) { |
| 15 | use_assembly_programs = device.UseAssemblyShaders(); | ||
| 16 | if (use_assembly_programs) { | ||
| 17 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 18 | } else { | ||
| 19 | graphics_pipeline.Create(); | ||
| 20 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 21 | } | ||
| 22 | } | ||
| 14 | 23 | ||
| 15 | ProgramManager::~ProgramManager() = default; | 24 | ProgramManager::~ProgramManager() = default; |
| 16 | 25 | ||
| 17 | void ProgramManager::Create() { | 26 | void ProgramManager::BindCompute(GLuint program) { |
| 18 | graphics_pipeline.Create(); | 27 | if (use_assembly_programs) { |
| 19 | glBindProgramPipeline(graphics_pipeline.handle); | 28 | glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); |
| 29 | } else { | ||
| 30 | is_graphics_bound = false; | ||
| 31 | glUseProgram(program); | ||
| 32 | } | ||
| 20 | } | 33 | } |
| 21 | 34 | ||
| 22 | void ProgramManager::BindGraphicsPipeline() { | 35 | void ProgramManager::BindGraphicsPipeline() { |
| 23 | if (!is_graphics_bound) { | 36 | if (use_assembly_programs) { |
| 24 | is_graphics_bound = true; | 37 | UpdateAssemblyPrograms(); |
| 25 | glUseProgram(0); | 38 | } else { |
| 39 | UpdateSourcePrograms(); | ||
| 26 | } | 40 | } |
| 41 | } | ||
| 27 | 42 | ||
| 28 | // Avoid updating the pipeline when values have no changed | 43 | void ProgramManager::BindHostPipeline(GLuint pipeline) { |
| 29 | if (old_state == current_state) { | 44 | if (use_assembly_programs) { |
| 30 | return; | 45 | if (geometry_enabled) { |
| 46 | geometry_enabled = false; | ||
| 47 | old_state.geometry = 0; | ||
| 48 | glDisable(GL_GEOMETRY_PROGRAM_NV); | ||
| 49 | } | ||
| 31 | } | 50 | } |
| 51 | glBindProgramPipeline(pipeline); | ||
| 52 | } | ||
| 32 | 53 | ||
| 33 | // Workaround for AMD bug | 54 | void ProgramManager::RestoreGuestPipeline() { |
| 34 | static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | | 55 | if (use_assembly_programs) { |
| 35 | GL_FRAGMENT_SHADER_BIT}; | 56 | glBindProgramPipeline(0); |
| 36 | const GLuint handle = graphics_pipeline.handle; | 57 | } else { |
| 37 | glUseProgramStages(handle, all_used_stages, 0); | 58 | glBindProgramPipeline(graphics_pipeline.handle); |
| 38 | glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); | 59 | } |
| 39 | glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); | 60 | } |
| 40 | glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); | 61 | |
| 62 | void ProgramManager::UpdateAssemblyPrograms() { | ||
| 63 | const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { | ||
| 64 | if (current == old) { | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | if (current == 0) { | ||
| 68 | if (enabled) { | ||
| 69 | enabled = false; | ||
| 70 | glDisable(stage); | ||
| 71 | } | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | if (!enabled) { | ||
| 75 | enabled = true; | ||
| 76 | glEnable(stage); | ||
| 77 | } | ||
| 78 | glBindProgramARB(stage, current); | ||
| 79 | }; | ||
| 80 | |||
| 81 | update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); | ||
| 82 | update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, | ||
| 83 | old_state.geometry); | ||
| 84 | update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, | ||
| 85 | old_state.fragment); | ||
| 41 | 86 | ||
| 42 | old_state = current_state; | 87 | old_state = current_state; |
| 43 | } | 88 | } |
| 44 | 89 | ||
| 45 | void ProgramManager::BindComputeShader(GLuint program) { | 90 | void ProgramManager::UpdateSourcePrograms() { |
| 46 | is_graphics_bound = false; | 91 | if (!is_graphics_bound) { |
| 47 | glUseProgram(program); | 92 | is_graphics_bound = true; |
| 93 | glUseProgram(0); | ||
| 94 | } | ||
| 95 | |||
| 96 | const GLuint handle = graphics_pipeline.handle; | ||
| 97 | const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { | ||
| 98 | if (current == old) { | ||
| 99 | return; | ||
| 100 | } | ||
| 101 | glUseProgramStages(handle, stage, current); | ||
| 102 | }; | ||
| 103 | update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); | ||
| 104 | update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); | ||
| 105 | update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); | ||
| 106 | |||
| 107 | old_state = current_state; | ||
| 48 | } | 108 | } |
| 49 | 109 | ||
| 50 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | 110 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { |
| @@ -54,4 +114,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | |||
| 54 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; | 114 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; |
| 55 | } | 115 | } |
| 56 | 116 | ||
| 57 | } // namespace OpenGL::GLShader | 117 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index d2e47f2a9..0f03b4f12 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -11,7 +11,9 @@ | |||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 13 | 13 | ||
| 14 | namespace OpenGL::GLShader { | 14 | namespace OpenGL { |
| 15 | |||
| 16 | class Device; | ||
| 15 | 17 | ||
| 16 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | 18 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned |
| 17 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at | 19 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at |
| @@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384, | |||
| 28 | 30 | ||
| 29 | class ProgramManager { | 31 | class ProgramManager { |
| 30 | public: | 32 | public: |
| 31 | explicit ProgramManager(); | 33 | explicit ProgramManager(const Device& device); |
| 32 | ~ProgramManager(); | 34 | ~ProgramManager(); |
| 33 | 35 | ||
| 34 | void Create(); | 36 | /// Binds a compute program |
| 37 | void BindCompute(GLuint program); | ||
| 35 | 38 | ||
| 36 | /// Updates the graphics pipeline and binds it. | 39 | /// Updates bound programs. |
| 37 | void BindGraphicsPipeline(); | 40 | void BindGraphicsPipeline(); |
| 38 | 41 | ||
| 39 | /// Binds a compute shader. | 42 | /// Binds an OpenGL pipeline object unsynchronized with the guest state. |
| 40 | void BindComputeShader(GLuint program); | 43 | void BindHostPipeline(GLuint pipeline); |
| 44 | |||
| 45 | /// Rewinds BindHostPipeline state changes. | ||
| 46 | void RestoreGuestPipeline(); | ||
| 41 | 47 | ||
| 42 | void UseVertexShader(GLuint program) { | 48 | void UseVertexShader(GLuint program) { |
| 43 | current_state.vertex_shader = program; | 49 | current_state.vertex = program; |
| 44 | } | 50 | } |
| 45 | 51 | ||
| 46 | void UseGeometryShader(GLuint program) { | 52 | void UseGeometryShader(GLuint program) { |
| 47 | current_state.geometry_shader = program; | 53 | current_state.geometry = program; |
| 48 | } | 54 | } |
| 49 | 55 | ||
| 50 | void UseFragmentShader(GLuint program) { | 56 | void UseFragmentShader(GLuint program) { |
| 51 | current_state.fragment_shader = program; | 57 | current_state.fragment = program; |
| 52 | } | 58 | } |
| 53 | 59 | ||
| 54 | private: | 60 | private: |
| 55 | struct PipelineState { | 61 | struct PipelineState { |
| 56 | bool operator==(const PipelineState& rhs) const noexcept { | 62 | GLuint vertex = 0; |
| 57 | return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && | 63 | GLuint geometry = 0; |
| 58 | geometry_shader == rhs.geometry_shader; | 64 | GLuint fragment = 0; |
| 59 | } | ||
| 60 | |||
| 61 | bool operator!=(const PipelineState& rhs) const noexcept { | ||
| 62 | return !operator==(rhs); | ||
| 63 | } | ||
| 64 | |||
| 65 | GLuint vertex_shader = 0; | ||
| 66 | GLuint fragment_shader = 0; | ||
| 67 | GLuint geometry_shader = 0; | ||
| 68 | }; | 65 | }; |
| 69 | 66 | ||
| 67 | /// Update NV_gpu_program5 programs. | ||
| 68 | void UpdateAssemblyPrograms(); | ||
| 69 | |||
| 70 | /// Update GLSL programs. | ||
| 71 | void UpdateSourcePrograms(); | ||
| 72 | |||
| 70 | OGLPipeline graphics_pipeline; | 73 | OGLPipeline graphics_pipeline; |
| 71 | OGLPipeline compute_pipeline; | 74 | |
| 72 | PipelineState current_state; | 75 | PipelineState current_state; |
| 73 | PipelineState old_state; | 76 | PipelineState old_state; |
| 77 | |||
| 78 | bool use_assembly_programs = false; | ||
| 79 | |||
| 74 | bool is_graphics_bound = true; | 80 | bool is_graphics_bound = true; |
| 81 | |||
| 82 | bool vertex_enabled = false; | ||
| 83 | bool geometry_enabled = false; | ||
| 84 | bool fragment_enabled = false; | ||
| 75 | }; | 85 | }; |
| 76 | 86 | ||
| 77 | } // namespace OpenGL::GLShader | 87 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b2a179746..6b489e6db 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -316,7 +316,7 @@ public: | |||
| 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, | 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, |
| 317 | Core::Frontend::GraphicsContext& context) | 317 | Core::Frontend::GraphicsContext& context) |
| 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, | 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, |
| 319 | has_debug_tool{HasDebugTool()} {} | 319 | program_manager{device}, has_debug_tool{HasDebugTool()} {} |
| 320 | 320 | ||
| 321 | RendererOpenGL::~RendererOpenGL() = default; | 321 | RendererOpenGL::~RendererOpenGL() = default; |
| 322 | 322 | ||
| @@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 468 | vertex_program.Create(true, false, vertex_shader.handle); | 468 | vertex_program.Create(true, false, vertex_shader.handle); |
| 469 | fragment_program.Create(true, false, fragment_shader.handle); | 469 | fragment_program.Create(true, false, fragment_shader.handle); |
| 470 | 470 | ||
| 471 | // Create program pipeline | 471 | pipeline.Create(); |
| 472 | program_manager.Create(); | 472 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); |
| 473 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | ||
| 473 | 474 | ||
| 474 | // Generate VBO handle for drawing | 475 | // Generate VBO handle for drawing |
| 475 | vertex_buffer.Create(); | 476 | vertex_buffer.Create(); |
| @@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 508 | if (rasterizer) { | 509 | if (rasterizer) { |
| 509 | return; | 510 | return; |
| 510 | } | 511 | } |
| 511 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, | 512 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, |
| 512 | program_manager, state_tracker); | 513 | program_manager, state_tracker); |
| 513 | } | 514 | } |
| 514 | 515 | ||
| @@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 620 | state_tracker.NotifyClipControl(); | 621 | state_tracker.NotifyClipControl(); |
| 621 | state_tracker.NotifyAlphaTest(); | 622 | state_tracker.NotifyAlphaTest(); |
| 622 | 623 | ||
| 623 | program_manager.UseVertexShader(vertex_program.handle); | 624 | program_manager.BindHostPipeline(pipeline.handle); |
| 624 | program_manager.UseGeometryShader(0); | ||
| 625 | program_manager.UseFragmentShader(fragment_program.handle); | ||
| 626 | program_manager.BindGraphicsPipeline(); | ||
| 627 | 625 | ||
| 628 | glEnable(GL_CULL_FACE); | 626 | glEnable(GL_CULL_FACE); |
| 629 | if (screen_info.display_srgb) { | 627 | if (screen_info.display_srgb) { |
| @@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 665 | 663 | ||
| 666 | glClear(GL_COLOR_BUFFER_BIT); | 664 | glClear(GL_COLOR_BUFFER_BIT); |
| 667 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 665 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 666 | |||
| 667 | program_manager.RestoreGuestPipeline(); | ||
| 668 | } | 668 | } |
| 669 | 669 | ||
| 670 | bool RendererOpenGL::TryPresent(int timeout_ms) { | 670 | bool RendererOpenGL::TryPresent(int timeout_ms) { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 50b647661..61bf507f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/math_util.h" | 10 | #include "common/math_util.h" |
| 11 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 15 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| @@ -95,6 +96,7 @@ private: | |||
| 95 | Core::Frontend::EmuWindow& emu_window; | 96 | Core::Frontend::EmuWindow& emu_window; |
| 96 | Core::System& system; | 97 | Core::System& system; |
| 97 | Core::Frontend::GraphicsContext& context; | 98 | Core::Frontend::GraphicsContext& context; |
| 99 | const Device device; | ||
| 98 | 100 | ||
| 99 | StateTracker state_tracker{system}; | 101 | StateTracker state_tracker{system}; |
| 100 | 102 | ||
| @@ -102,13 +104,14 @@ private: | |||
| 102 | OGLBuffer vertex_buffer; | 104 | OGLBuffer vertex_buffer; |
| 103 | OGLProgram vertex_program; | 105 | OGLProgram vertex_program; |
| 104 | OGLProgram fragment_program; | 106 | OGLProgram fragment_program; |
| 107 | OGLPipeline pipeline; | ||
| 105 | OGLFramebuffer screenshot_framebuffer; | 108 | OGLFramebuffer screenshot_framebuffer; |
| 106 | 109 | ||
| 107 | /// Display information for Switch screen | 110 | /// Display information for Switch screen |
| 108 | ScreenInfo screen_info; | 111 | ScreenInfo screen_info; |
| 109 | 112 | ||
| 110 | /// Global dummy shader pipeline | 113 | /// Global dummy shader pipeline |
| 111 | GLShader::ProgramManager program_manager; | 114 | ProgramManager program_manager; |
| 112 | 115 | ||
| 113 | /// OpenGL framebuffer data | 116 | /// OpenGL framebuffer data |
| 114 | std::vector<u8> gl_framebuffer_data; | 117 | std::vector<u8> gl_framebuffer_data; |