summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp96
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h16
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp101
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp106
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h56
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp16
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
12 files changed, 339 insertions, 109 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index d83dca25a..466a911db 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -13,6 +13,7 @@
13 13
14#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "common/scope_exit.h" 15#include "common/scope_exit.h"
16#include "core/settings.h"
16#include "video_core/renderer_opengl/gl_device.h" 17#include "video_core/renderer_opengl/gl_device.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 18#include "video_core/renderer_opengl/gl_resource_manager.h"
18 19
@@ -183,10 +184,16 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
183 has_precise_bug = TestPreciseBug(); 184 has_precise_bug = TestPreciseBug();
184 has_broken_compute = is_intel_proprietary; 185 has_broken_compute = is_intel_proprietary;
185 has_fast_buffer_sub_data = is_nvidia; 186 has_fast_buffer_sub_data = is_nvidia;
187 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
188 GLAD_GL_NV_compute_program5;
186 189
187 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 190 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
188 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); 191 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
189 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); 192 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
193
194 if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
195 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
196 }
190} 197}
191 198
192Device::Device(std::nullptr_t) { 199Device::Device(std::nullptr_t) {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index a55050cb5..e915dbd86 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -88,6 +88,10 @@ public:
88 return has_fast_buffer_sub_data; 88 return has_fast_buffer_sub_data;
89 } 89 }
90 90
91 bool UseAssemblyShaders() const {
92 return use_assembly_shaders;
93 }
94
91private: 95private:
92 static bool TestVariableAoffi(); 96 static bool TestVariableAoffi();
93 static bool TestPreciseBug(); 97 static bool TestPreciseBug();
@@ -107,6 +111,7 @@ private:
107 bool has_precise_bug{}; 111 bool has_precise_bug{};
108 bool has_broken_compute{}; 112 bool has_broken_compute{};
109 bool has_fast_buffer_sub_data{}; 113 bool has_fast_buffer_sub_data{};
114 bool use_assembly_shaders{};
110}; 115};
111 116
112} // namespace OpenGL 117} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 69dcf952f..92ca22136 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) {
94} // Anonymous namespace 94} // Anonymous namespace
95 95
96RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 96RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
97 ScreenInfo& info, GLShader::ProgramManager& program_manager, 97 const Device& device, ScreenInfo& info,
98 StateTracker& state_tracker) 98 ProgramManager& program_manager, StateTracker& state_tracker)
99 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, 99 : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
100 state_tracker},
100 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, 101 shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
101 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, 102 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
102 fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, 103 fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
103 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { 104 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
104 CheckExtensions(); 105 CheckExtensions();
106
107 if (device.UseAssemblyShaders()) {
108 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
109 for (const GLuint cbuf : staging_cbufs) {
110 glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
111 nullptr, 0);
112 }
113 }
105} 114}
106 115
107RasterizerOpenGL::~RasterizerOpenGL() {} 116RasterizerOpenGL::~RasterizerOpenGL() {
117 if (device.UseAssemblyShaders()) {
118 glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
119 }
120}
108 121
109void RasterizerOpenGL::CheckExtensions() { 122void RasterizerOpenGL::CheckExtensions() {
110 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { 123 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
@@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
230void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 243void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
231 MICROPROFILE_SCOPE(OpenGL_Shader); 244 MICROPROFILE_SCOPE(OpenGL_Shader);
232 auto& gpu = system.GPU().Maxwell3D(); 245 auto& gpu = system.GPU().Maxwell3D();
246 std::size_t num_ssbos = 0;
233 u32 clip_distances = 0; 247 u32 clip_distances = 0;
234 248
235 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 249 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
261 275
262 Shader shader{shader_cache.GetStageProgram(program)}; 276 Shader shader{shader_cache.GetStageProgram(program)};
263 277
278 if (device.UseAssemblyShaders()) {
279 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
280 // all stages share the same bindings.
281 const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
282 ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
283 num_ssbos += num_stage_ssbos;
284 }
285
264 // Stage indices are 0 - 5 286 // Stage indices are 0 - 5
265 const std::size_t stage = index == 0 ? 0 : index - 1; 287 const std::size_t stage = index == 0 ? 0 : index - 1;
266 SetupDrawConstBuffers(stage, shader); 288 SetupDrawConstBuffers(stage, shader);
@@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
526 SyncFramebufferSRGB(); 548 SyncFramebufferSRGB();
527 549
528 buffer_cache.Acquire(); 550 buffer_cache.Acquire();
551 current_cbuf = 0;
529 552
530 std::size_t buffer_size = CalculateVertexArraysSize(); 553 std::size_t buffer_size = CalculateVertexArraysSize();
531 554
@@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
535 } 558 }
536 559
537 // Uniform space for the 5 shader stages 560 // Uniform space for the 5 shader stages
538 buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + 561 buffer_size =
539 (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * 562 Common::AlignUp<std::size_t>(buffer_size, 4) +
540 Maxwell::MaxShaderStage; 563 (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
541 564
542 // Add space for at least 18 constant buffers 565 // Add space for at least 18 constant buffers
543 buffer_size += Maxwell::MaxConstBuffers * 566 buffer_size += Maxwell::MaxConstBuffers *
@@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
558 } 581 }
559 582
560 // Setup emulation uniform buffer. 583 // Setup emulation uniform buffer.
561 GLShader::MaxwellUniformData ubo; 584 if (!device.UseAssemblyShaders()) {
562 ubo.SetFromRegs(gpu); 585 MaxwellUniformData ubo;
563 const auto [buffer, offset] = 586 ubo.SetFromRegs(gpu);
564 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 587 const auto [buffer, offset] =
565 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, 588 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
566 static_cast<GLsizeiptr>(sizeof(ubo))); 589 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
590 static_cast<GLsizeiptr>(sizeof(ubo)));
591 }
567 592
568 // Setup shaders and their used resources. 593 // Setup shaders and their used resources.
569 texture_cache.GuardSamplers(true); 594 texture_cache.GuardSamplers(true);
@@ -635,11 +660,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
635 } 660 }
636 661
637 buffer_cache.Acquire(); 662 buffer_cache.Acquire();
663 current_cbuf = 0;
638 664
639 auto kernel = shader_cache.GetComputeKernel(code_addr); 665 auto kernel = shader_cache.GetComputeKernel(code_addr);
640 SetupComputeTextures(kernel); 666 SetupComputeTextures(kernel);
641 SetupComputeImages(kernel); 667 SetupComputeImages(kernel);
642 program_manager.BindComputeShader(kernel->GetHandle());
643 668
644 const std::size_t buffer_size = 669 const std::size_t buffer_size =
645 Tegra::Engines::KeplerCompute::NumConstBuffers * 670 Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -652,6 +677,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
652 buffer_cache.Unmap(); 677 buffer_cache.Unmap();
653 678
654 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 679 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
680 program_manager.BindCompute(kernel->GetHandle());
655 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 681 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
656 ++num_queued_commands; 682 ++num_queued_commands;
657} 683}
@@ -812,14 +838,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
812} 838}
813 839
814void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { 840void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
841 static constexpr std::array PARAMETER_LUT = {
842 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
843 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
844 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
845
815 MICROPROFILE_SCOPE(OpenGL_UBO); 846 MICROPROFILE_SCOPE(OpenGL_UBO);
816 const auto& stages = system.GPU().Maxwell3D().state.shader_stages; 847 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
817 const auto& shader_stage = stages[stage_index]; 848 const auto& shader_stage = stages[stage_index];
818 849
819 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; 850 u32 binding =
851 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
820 for (const auto& entry : shader->GetEntries().const_buffers) { 852 for (const auto& entry : shader->GetEntries().const_buffers) {
821 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 853 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
822 SetupConstBuffer(binding++, buffer, entry); 854 SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
823 } 855 }
824} 856}
825 857
@@ -835,16 +867,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
835 buffer.address = config.Address(); 867 buffer.address = config.Address();
836 buffer.size = config.size; 868 buffer.size = config.size;
837 buffer.enabled = mask[entry.GetIndex()]; 869 buffer.enabled = mask[entry.GetIndex()];
838 SetupConstBuffer(binding++, buffer, entry); 870 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
839 } 871 }
840} 872}
841 873
842void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 874void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
875 const Tegra::Engines::ConstBufferInfo& buffer,
843 const ConstBufferEntry& entry) { 876 const ConstBufferEntry& entry) {
844 if (!buffer.enabled) { 877 if (!buffer.enabled) {
845 // Set values to zero to unbind buffers 878 // Set values to zero to unbind buffers
846 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, 879 if (device.UseAssemblyShaders()) {
847 sizeof(float)); 880 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
881 } else {
882 glBindBufferRange(GL_UNIFORM_BUFFER, binding,
883 buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
884 }
848 return; 885 return;
849 } 886 }
850 887
@@ -853,9 +890,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
853 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); 890 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
854 891
855 const auto alignment = device.GetUniformBufferAlignment(); 892 const auto alignment = device.GetUniformBufferAlignment();
856 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, 893 auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
857 device.HasFastBufferSubData()); 894 device.HasFastBufferSubData());
858 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 895 if (!device.UseAssemblyShaders()) {
896 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
897 return;
898 }
899 if (offset != 0) {
900 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
901 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
902 cbuf = staging_cbuf;
903 offset = 0;
904 }
905 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
859} 906}
860 907
861void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 908void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -863,7 +910,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
863 auto& memory_manager{gpu.MemoryManager()}; 910 auto& memory_manager{gpu.MemoryManager()};
864 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 911 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
865 912
866 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; 913 u32 binding =
914 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
867 for (const auto& entry : shader->GetEntries().global_memory_entries) { 915 for (const auto& entry : shader->GetEntries().global_memory_entries) {
868 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; 916 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
869 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; 917 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b94c65907..87f7fe159 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -56,8 +56,8 @@ struct DrawParameters;
56class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { 56class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
57public: 57public:
58 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 58 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
59 ScreenInfo& info, GLShader::ProgramManager& program_manager, 59 const Device& device, ScreenInfo& info,
60 StateTracker& state_tracker); 60 ProgramManager& program_manager, StateTracker& state_tracker);
61 ~RasterizerOpenGL() override; 61 ~RasterizerOpenGL() override;
62 62
63 void Draw(bool is_indexed, bool is_instanced) override; 63 void Draw(bool is_indexed, bool is_instanced) override;
@@ -106,7 +106,7 @@ private:
106 void SetupComputeConstBuffers(const Shader& kernel); 106 void SetupComputeConstBuffers(const Shader& kernel);
107 107
108 /// Configures a constant buffer. 108 /// Configures a constant buffer.
109 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
110 const ConstBufferEntry& entry); 110 const ConstBufferEntry& entry);
111 111
112 /// Configures the current global memory entries to use for the draw command. 112 /// Configures the current global memory entries to use for the draw command.
@@ -224,7 +224,7 @@ private:
224 224
225 void SetupShaders(GLenum primitive_mode); 225 void SetupShaders(GLenum primitive_mode);
226 226
227 const Device device; 227 const Device& device;
228 228
229 TextureCacheOpenGL texture_cache; 229 TextureCacheOpenGL texture_cache;
230 ShaderCacheOpenGL shader_cache; 230 ShaderCacheOpenGL shader_cache;
@@ -236,7 +236,7 @@ private:
236 236
237 Core::System& system; 237 Core::System& system;
238 ScreenInfo& screen_info; 238 ScreenInfo& screen_info;
239 GLShader::ProgramManager& program_manager; 239 ProgramManager& program_manager;
240 StateTracker& state_tracker; 240 StateTracker& state_tracker;
241 241
242 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 242 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
@@ -248,6 +248,12 @@ private:
248 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> 248 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
249 enabled_transform_feedback_buffers; 249 enabled_transform_feedback_buffers;
250 250
251 static constexpr std::size_t NUM_CONSTANT_BUFFERS =
252 Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
253 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
254 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
255 std::size_t current_cbuf = 0;
256
251 /// Number of commands queued to the OpenGL driver. Reseted on flush. 257 /// Number of commands queued to the OpenGL driver. Reseted on flush.
252 std::size_t num_queued_commands = 0; 258 std::size_t num_queued_commands = 0;
253 259
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 97803d480..a787e27d2 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -125,6 +125,15 @@ void OGLProgram::Release() {
125 handle = 0; 125 handle = 0;
126} 126}
127 127
128void OGLAssemblyProgram::Release() {
129 if (handle == 0) {
130 return;
131 }
132 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
133 glDeleteProgramsARB(1, &handle);
134 handle = 0;
135}
136
128void OGLPipeline::Create() { 137void OGLPipeline::Create() {
129 if (handle != 0) 138 if (handle != 0)
130 return; 139 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index de93f4212..f8b322227 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -167,6 +167,22 @@ public:
167 GLuint handle = 0; 167 GLuint handle = 0;
168}; 168};
169 169
170class OGLAssemblyProgram : private NonCopyable {
171public:
172 OGLAssemblyProgram() = default;
173
174 OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
175
176 ~OGLAssemblyProgram() {
177 Release();
178 }
179
180 /// Deletes the internal OpenGL resource
181 void Release();
182
183 GLuint handle = 0;
184};
185
170class OGLPipeline : private NonCopyable { 186class OGLPipeline : private NonCopyable {
171public: 187public:
172 OGLPipeline() = default; 188 OGLPipeline() = default;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 9759a7078..4cd0f36cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
97 return {}; 97 return {};
98} 98}
99 99
100constexpr GLenum AssemblyEnum(ShaderType shader_type) {
101 switch (shader_type) {
102 case ShaderType::Vertex:
103 return GL_VERTEX_PROGRAM_NV;
104 case ShaderType::TesselationControl:
105 return GL_TESS_CONTROL_PROGRAM_NV;
106 case ShaderType::TesselationEval:
107 return GL_TESS_EVALUATION_PROGRAM_NV;
108 case ShaderType::Geometry:
109 return GL_GEOMETRY_PROGRAM_NV;
110 case ShaderType::Fragment:
111 return GL_FRAGMENT_PROGRAM_NV;
112 case ShaderType::Compute:
113 return GL_COMPUTE_PROGRAM_NV;
114 }
115 return {};
116}
117
100std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { 118std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
101 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); 119 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
102} 120}
@@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
120 return registry; 138 return registry;
121} 139}
122 140
123std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, 141ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
124 u64 unique_identifier, const ShaderIR& ir, 142 const ShaderIR& ir, const Registry& registry,
125 const Registry& registry, bool hint_retrievable = false) { 143 bool hint_retrievable = false) {
126 const std::string shader_id = MakeShaderID(unique_identifier, shader_type); 144 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
127 LOG_INFO(Render_OpenGL, "{}", shader_id); 145 LOG_INFO(Render_OpenGL, "{}", shader_id);
128 146
129 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); 147 auto program = std::make_shared<ProgramHandle>();
130 OGLShader shader; 148
131 shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); 149 if (device.UseAssemblyShaders()) {
150 const std::string arb = "Not implemented";
151
152 GLuint& arb_prog = program->assembly_program.handle;
153
154// Commented out functions signal OpenGL errors but are compatible with apitrace.
155// Use them only to capture and replay on apitrace.
156#if 0
157 glGenProgramsNV(1, &arb_prog);
158 glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
159 reinterpret_cast<const GLubyte*>(arb.data()));
160#else
161 glGenProgramsARB(1, &arb_prog);
162 glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
163 static_cast<GLsizei>(arb.size()), arb.data());
164#endif
165 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
166 if (err && *err) {
167 LOG_CRITICAL(Render_OpenGL, "{}", err);
168 LOG_INFO(Render_OpenGL, "\n{}", arb);
169 }
170 } else {
171 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
172 OGLShader shader;
173 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
174
175 program->source_program.Create(true, hint_retrievable, shader.handle);
176 }
132 177
133 auto program = std::make_shared<OGLProgram>();
134 program->Create(true, hint_retrievable, shader.handle);
135 return program; 178 return program;
136} 179}
137 180
@@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() {
153 196
154CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 197CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
155 std::shared_ptr<VideoCommon::Shader::Registry> registry, 198 std::shared_ptr<VideoCommon::Shader::Registry> registry,
156 ShaderEntries entries, std::shared_ptr<OGLProgram> program) 199 ShaderEntries entries, ProgramSharedPtr program_)
157 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, 200 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
158 size_in_bytes{size_in_bytes}, program{std::move(program)} {} 201 size_in_bytes{size_in_bytes}, program{std::move(program_)} {
202 // Assign either the assembly program or source program. We can't have both.
203 handle = program->assembly_program.handle;
204 if (handle == 0) {
205 handle = program->source_program.handle;
206 }
207 ASSERT(handle != 0);
208}
159 209
160CachedShader::~CachedShader() = default; 210CachedShader::~CachedShader() = default;
161 211
162GLuint CachedShader::GetHandle() const { 212GLuint CachedShader::GetHandle() const {
163 DEBUG_ASSERT(registry->IsConsistent()); 213 DEBUG_ASSERT(registry->IsConsistent());
164 return program->handle; 214 return handle;
165} 215}
166 216
167Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 217Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
@@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
239 return; 289 return;
240 } 290 }
241 291
242 const std::vector gl_cache = disk_cache.LoadPrecompiled(); 292 std::vector<ShaderDiskCachePrecompiled> gl_cache;
293 if (!device.UseAssemblyShaders()) {
294 // Only load precompiled cache when we are not using assembly shaders
295 gl_cache = disk_cache.LoadPrecompiled();
296 }
243 const auto supported_formats = GetSupportedFormats(); 297 const auto supported_formats = GetSupportedFormats();
244 298
245 // Track if precompiled cache was altered during loading to know if we have to 299 // Track if precompiled cache was altered during loading to know if we have to
@@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
278 auto registry = MakeRegistry(entry); 332 auto registry = MakeRegistry(entry);
279 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); 333 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
280 334
281 std::shared_ptr<OGLProgram> program; 335 ProgramSharedPtr program;
282 if (precompiled_entry) { 336 if (precompiled_entry) {
283 // If the shader is precompiled, attempt to load it with 337 // If the shader is precompiled, attempt to load it with
284 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); 338 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
@@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
332 return; 386 return;
333 } 387 }
334 388
389 if (device.UseAssemblyShaders()) {
390 // Don't store precompiled binaries for assembly shaders.
391 return;
392 }
393
335 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 394 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
336 // before precompiling them 395 // before precompiling them
337 396
@@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
339 const u64 id = (*transferable)[i].unique_identifier; 398 const u64 id = (*transferable)[i].unique_identifier;
340 const auto it = find_precompiled(id); 399 const auto it = find_precompiled(id);
341 if (it == gl_cache.end()) { 400 if (it == gl_cache.end()) {
342 const GLuint program = runtime_cache.at(id).program->handle; 401 const GLuint program = runtime_cache.at(id).program->source_program.handle;
343 disk_cache.SavePrecompiled(id, program); 402 disk_cache.SavePrecompiled(id, program);
344 precompiled_cache_altered = true; 403 precompiled_cache_altered = true;
345 } 404 }
@@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
350 } 409 }
351} 410}
352 411
353std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( 412ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
354 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 413 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
355 const std::unordered_set<GLenum>& supported_formats) { 414 const std::unordered_set<GLenum>& supported_formats) {
356 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { 415 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
@@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
358 return {}; 417 return {};
359 } 418 }
360 419
361 auto program = std::make_shared<OGLProgram>(); 420 auto program = std::make_shared<ProgramHandle>();
362 program->handle = glCreateProgram(); 421 GLuint& handle = program->source_program.handle;
363 glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); 422 handle = glCreateProgram();
364 glProgramBinary(program->handle, precompiled_entry.binary_format, 423 glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
365 precompiled_entry.binary.data(), 424 glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
366 static_cast<GLsizei>(precompiled_entry.binary.size())); 425 static_cast<GLsizei>(precompiled_entry.binary.size()));
367 426
368 GLint link_status; 427 GLint link_status;
369 glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); 428 glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
370 if (link_status == GL_FALSE) { 429 if (link_status == GL_FALSE) {
371 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); 430 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
372 return {}; 431 return {};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 91690b470..b2ae8d7f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -43,8 +43,14 @@ struct UnspecializedShader;
43using Shader = std::shared_ptr<CachedShader>; 43using Shader = std::shared_ptr<CachedShader>;
44using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45 45
46struct ProgramHandle {
47 OGLProgram source_program;
48 OGLAssemblyProgram assembly_program;
49};
50using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
51
46struct PrecompiledShader { 52struct PrecompiledShader {
47 std::shared_ptr<OGLProgram> program; 53 ProgramSharedPtr program;
48 std::shared_ptr<VideoCommon::Shader::Registry> registry; 54 std::shared_ptr<VideoCommon::Shader::Registry> registry;
49 ShaderEntries entries; 55 ShaderEntries entries;
50}; 56};
@@ -87,12 +93,13 @@ public:
87private: 93private:
88 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 94 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
89 std::shared_ptr<VideoCommon::Shader::Registry> registry, 95 std::shared_ptr<VideoCommon::Shader::Registry> registry,
90 ShaderEntries entries, std::shared_ptr<OGLProgram> program); 96 ShaderEntries entries, ProgramSharedPtr program);
91 97
92 std::shared_ptr<VideoCommon::Shader::Registry> registry; 98 std::shared_ptr<VideoCommon::Shader::Registry> registry;
93 ShaderEntries entries; 99 ShaderEntries entries;
94 std::size_t size_in_bytes = 0; 100 std::size_t size_in_bytes = 0;
95 std::shared_ptr<OGLProgram> program; 101 ProgramSharedPtr program;
102 GLuint handle = 0;
96}; 103};
97 104
98class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 105class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -115,7 +122,7 @@ protected:
115 void FlushObjectInner(const Shader& object) override {} 122 void FlushObjectInner(const Shader& object) override {}
116 123
117private: 124private:
118 std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( 125 ProgramSharedPtr GeneratePrecompiledProgram(
119 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 126 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
120 const std::unordered_set<GLenum>& supported_formats); 127 const std::unordered_set<GLenum>& supported_formats);
121 128
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 9c7b0adbd..96605db84 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -6,45 +6,105 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_opengl/gl_device.h"
9#include "video_core/renderer_opengl/gl_shader_manager.h" 10#include "video_core/renderer_opengl/gl_shader_manager.h"
10 11
11namespace OpenGL::GLShader { 12namespace OpenGL {
12 13
13ProgramManager::ProgramManager() = default; 14ProgramManager::ProgramManager(const Device& device) {
15 use_assembly_programs = device.UseAssemblyShaders();
16 if (use_assembly_programs) {
17 glEnable(GL_COMPUTE_PROGRAM_NV);
18 } else {
19 graphics_pipeline.Create();
20 glBindProgramPipeline(graphics_pipeline.handle);
21 }
22}
14 23
15ProgramManager::~ProgramManager() = default; 24ProgramManager::~ProgramManager() = default;
16 25
17void ProgramManager::Create() { 26void ProgramManager::BindCompute(GLuint program) {
18 graphics_pipeline.Create(); 27 if (use_assembly_programs) {
19 glBindProgramPipeline(graphics_pipeline.handle); 28 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
29 } else {
30 is_graphics_bound = false;
31 glUseProgram(program);
32 }
20} 33}
21 34
22void ProgramManager::BindGraphicsPipeline() { 35void ProgramManager::BindGraphicsPipeline() {
23 if (!is_graphics_bound) { 36 if (use_assembly_programs) {
24 is_graphics_bound = true; 37 UpdateAssemblyPrograms();
25 glUseProgram(0); 38 } else {
39 UpdateSourcePrograms();
26 } 40 }
41}
27 42
28 // Avoid updating the pipeline when values have no changed 43void ProgramManager::BindHostPipeline(GLuint pipeline) {
29 if (old_state == current_state) { 44 if (use_assembly_programs) {
30 return; 45 if (geometry_enabled) {
46 geometry_enabled = false;
47 old_state.geometry = 0;
48 glDisable(GL_GEOMETRY_PROGRAM_NV);
49 }
31 } 50 }
51 glBindProgramPipeline(pipeline);
52}
32 53
33 // Workaround for AMD bug 54void ProgramManager::RestoreGuestPipeline() {
34 static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | 55 if (use_assembly_programs) {
35 GL_FRAGMENT_SHADER_BIT}; 56 glBindProgramPipeline(0);
36 const GLuint handle = graphics_pipeline.handle; 57 } else {
37 glUseProgramStages(handle, all_used_stages, 0); 58 glBindProgramPipeline(graphics_pipeline.handle);
38 glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); 59 }
39 glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); 60}
40 glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); 61
62void ProgramManager::UpdateAssemblyPrograms() {
63 const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
64 if (current == old) {
65 return;
66 }
67 if (current == 0) {
68 if (enabled) {
69 enabled = false;
70 glDisable(stage);
71 }
72 return;
73 }
74 if (!enabled) {
75 enabled = true;
76 glEnable(stage);
77 }
78 glBindProgramARB(stage, current);
79 };
80
81 update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
82 update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
83 old_state.geometry);
84 update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
85 old_state.fragment);
41 86
42 old_state = current_state; 87 old_state = current_state;
43} 88}
44 89
45void ProgramManager::BindComputeShader(GLuint program) { 90void ProgramManager::UpdateSourcePrograms() {
46 is_graphics_bound = false; 91 if (!is_graphics_bound) {
47 glUseProgram(program); 92 is_graphics_bound = true;
93 glUseProgram(0);
94 }
95
96 const GLuint handle = graphics_pipeline.handle;
97 const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
98 if (current == old) {
99 return;
100 }
101 glUseProgramStages(handle, stage, current);
102 };
103 update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
104 update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
105 update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
106
107 old_state = current_state;
48} 108}
49 109
50void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { 110void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
@@ -54,4 +114,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
54 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; 114 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
55} 115}
56 116
57} // namespace OpenGL::GLShader 117} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index d2e47f2a9..0f03b4f12 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -11,7 +11,9 @@
11#include "video_core/renderer_opengl/gl_resource_manager.h" 11#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/maxwell_to_gl.h" 12#include "video_core/renderer_opengl/maxwell_to_gl.h"
13 13
14namespace OpenGL::GLShader { 14namespace OpenGL {
15
16class Device;
15 17
16/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned 18/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
17/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at 19/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
@@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
28 30
29class ProgramManager { 31class ProgramManager {
30public: 32public:
31 explicit ProgramManager(); 33 explicit ProgramManager(const Device& device);
32 ~ProgramManager(); 34 ~ProgramManager();
33 35
34 void Create(); 36 /// Binds a compute program
37 void BindCompute(GLuint program);
35 38
36 /// Updates the graphics pipeline and binds it. 39 /// Updates bound programs.
37 void BindGraphicsPipeline(); 40 void BindGraphicsPipeline();
38 41
39 /// Binds a compute shader. 42 /// Binds an OpenGL pipeline object unsynchronized with the guest state.
40 void BindComputeShader(GLuint program); 43 void BindHostPipeline(GLuint pipeline);
44
45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline();
41 47
42 void UseVertexShader(GLuint program) { 48 void UseVertexShader(GLuint program) {
43 current_state.vertex_shader = program; 49 current_state.vertex = program;
44 } 50 }
45 51
46 void UseGeometryShader(GLuint program) { 52 void UseGeometryShader(GLuint program) {
47 current_state.geometry_shader = program; 53 current_state.geometry = program;
48 } 54 }
49 55
50 void UseFragmentShader(GLuint program) { 56 void UseFragmentShader(GLuint program) {
51 current_state.fragment_shader = program; 57 current_state.fragment = program;
52 } 58 }
53 59
54private: 60private:
55 struct PipelineState { 61 struct PipelineState {
56 bool operator==(const PipelineState& rhs) const noexcept { 62 GLuint vertex = 0;
57 return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && 63 GLuint geometry = 0;
58 geometry_shader == rhs.geometry_shader; 64 GLuint fragment = 0;
59 }
60
61 bool operator!=(const PipelineState& rhs) const noexcept {
62 return !operator==(rhs);
63 }
64
65 GLuint vertex_shader = 0;
66 GLuint fragment_shader = 0;
67 GLuint geometry_shader = 0;
68 }; 65 };
69 66
67 /// Update NV_gpu_program5 programs.
68 void UpdateAssemblyPrograms();
69
70 /// Update GLSL programs.
71 void UpdateSourcePrograms();
72
70 OGLPipeline graphics_pipeline; 73 OGLPipeline graphics_pipeline;
71 OGLPipeline compute_pipeline; 74
72 PipelineState current_state; 75 PipelineState current_state;
73 PipelineState old_state; 76 PipelineState old_state;
77
78 bool use_assembly_programs = false;
79
74 bool is_graphics_bound = true; 80 bool is_graphics_bound = true;
81
82 bool vertex_enabled = false;
83 bool geometry_enabled = false;
84 bool fragment_enabled = false;
75}; 85};
76 86
77} // namespace OpenGL::GLShader 87} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b2a179746..6b489e6db 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,7 +316,7 @@ public:
316RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, 316RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
317 Core::Frontend::GraphicsContext& context) 317 Core::Frontend::GraphicsContext& context)
318 : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, 318 : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
319 has_debug_tool{HasDebugTool()} {} 319 program_manager{device}, has_debug_tool{HasDebugTool()} {}
320 320
321RendererOpenGL::~RendererOpenGL() = default; 321RendererOpenGL::~RendererOpenGL() = default;
322 322
@@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() {
468 vertex_program.Create(true, false, vertex_shader.handle); 468 vertex_program.Create(true, false, vertex_shader.handle);
469 fragment_program.Create(true, false, fragment_shader.handle); 469 fragment_program.Create(true, false, fragment_shader.handle);
470 470
471 // Create program pipeline 471 pipeline.Create();
472 program_manager.Create(); 472 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
473 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
473 474
474 // Generate VBO handle for drawing 475 // Generate VBO handle for drawing
475 vertex_buffer.Create(); 476 vertex_buffer.Create();
@@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() {
508 if (rasterizer) { 509 if (rasterizer) {
509 return; 510 return;
510 } 511 }
511 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, 512 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
512 program_manager, state_tracker); 513 program_manager, state_tracker);
513} 514}
514 515
@@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
620 state_tracker.NotifyClipControl(); 621 state_tracker.NotifyClipControl();
621 state_tracker.NotifyAlphaTest(); 622 state_tracker.NotifyAlphaTest();
622 623
623 program_manager.UseVertexShader(vertex_program.handle); 624 program_manager.BindHostPipeline(pipeline.handle);
624 program_manager.UseGeometryShader(0);
625 program_manager.UseFragmentShader(fragment_program.handle);
626 program_manager.BindGraphicsPipeline();
627 625
628 glEnable(GL_CULL_FACE); 626 glEnable(GL_CULL_FACE);
629 if (screen_info.display_srgb) { 627 if (screen_info.display_srgb) {
@@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
665 663
666 glClear(GL_COLOR_BUFFER_BIT); 664 glClear(GL_COLOR_BUFFER_BIT);
667 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 665 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
666
667 program_manager.RestoreGuestPipeline();
668} 668}
669 669
670bool RendererOpenGL::TryPresent(int timeout_ms) { 670bool RendererOpenGL::TryPresent(int timeout_ms) {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 50b647661..61bf507f4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
12#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h" 14#include "video_core/renderer_opengl/gl_shader_manager.h"
14#include "video_core/renderer_opengl/gl_state_tracker.h" 15#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -95,6 +96,7 @@ private:
95 Core::Frontend::EmuWindow& emu_window; 96 Core::Frontend::EmuWindow& emu_window;
96 Core::System& system; 97 Core::System& system;
97 Core::Frontend::GraphicsContext& context; 98 Core::Frontend::GraphicsContext& context;
99 const Device device;
98 100
99 StateTracker state_tracker{system}; 101 StateTracker state_tracker{system};
100 102
@@ -102,13 +104,14 @@ private:
102 OGLBuffer vertex_buffer; 104 OGLBuffer vertex_buffer;
103 OGLProgram vertex_program; 105 OGLProgram vertex_program;
104 OGLProgram fragment_program; 106 OGLProgram fragment_program;
107 OGLPipeline pipeline;
105 OGLFramebuffer screenshot_framebuffer; 108 OGLFramebuffer screenshot_framebuffer;
106 109
107 /// Display information for Switch screen 110 /// Display information for Switch screen
108 ScreenInfo screen_info; 111 ScreenInfo screen_info;
109 112
110 /// Global dummy shader pipeline 113 /// Global dummy shader pipeline
111 GLShader::ProgramManager program_manager; 114 ProgramManager program_manager;
112 115
113 /// OpenGL framebuffer data 116 /// OpenGL framebuffer data
114 std::vector<u8> gl_framebuffer_data; 117 std::vector<u8> gl_framebuffer_data;