From 82107b33a2251eb4f55ab2006a8fc0cb47cc39e8 Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 25 Jun 2023 18:43:23 -0400 Subject: OpenGL: Add Local Memory warmup shader --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 ++ src/video_core/renderer_opengl/gl_shader_manager.cpp | 10 +++++++++- src/video_core/renderer_opengl/gl_shader_manager.h | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fc711c44a..d03288516 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -222,6 +222,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { gpu.TickWork(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; + program_manager.LocalMemoryWarmup(); pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); @@ -371,6 +372,7 @@ void RasterizerOpenGL::DispatchCompute() { if (!pipeline) { return; } + program_manager.LocalMemoryWarmup(); pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description}; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 98841ae65..2f6ba6823 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -3,7 +3,9 @@ #include +#include "video_core/host_shaders/opengl_lmem_warmup_comp.h" #include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" namespace OpenGL { @@ -12,7 +14,8 @@ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; -ProgramManager::ProgramManager(const Device& device) { +ProgramManager::ProgramManager(const Device& device) + : lmem_warmup_program(CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER)) { glCreateProgramPipelines(1, &pipeline.handle); if (device.UseAssemblyShaders()) { glEnable(GL_COMPUTE_PROGRAM_NV); @@ -98,6 +101,11 @@ void ProgramManager::BindAssemblyPrograms(std::span current_programs{}; GLuint current_assembly_compute_program = 0; + OGLProgram lmem_warmup_program; }; } // namespace OpenGL -- cgit v1.2.3 From 405eae3734dd6bfb259df0afceecf4de1f1262ce Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 25 Jun 2023 18:59:33 -0400 Subject: shaders: Track local memory usage --- src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 1 + src/video_core/renderer_opengl/gl_compute_pipeline.h | 5 +++++ src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 1 + src/video_core/renderer_opengl/gl_graphics_pipeline.h | 5 +++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++-- 5 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 3151c0db8..f9ca55c36 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -63,6 +63,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac writes_global_memory = !use_storage_buffers && std::ranges::any_of(info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + uses_local_memory = info.uses_local_memory; if (force_context_flush) { std::scoped_lock lock{built_mutex}; built_fence.Create(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index 9bcc72b59..c26b4fa5e 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -59,6 +59,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool UsesLocalMemory() const noexcept { + return uses_local_memory; + } + void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, Tegra::MemoryManager* gpu_memory_) { kepler_compute = kepler_compute_; @@ -84,6 +88,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + bool uses_local_memory{}; std::mutex built_mutex; std::condition_variable built_condvar; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c58f760b8..23a48c6fe 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c writes_global_memory |= std::ranges::any_of( info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + uses_local_memory |= info.uses_local_memory; } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 7bab3be0a..7b3d7eae8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -98,6 +98,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool UsesLocalMemory() const noexcept { + return uses_local_memory; + } + [[nodiscard]] bool IsBuilt() noexcept; template @@ -146,6 +150,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + bool uses_local_memory{}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d03288516..edf527f2d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -222,7 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { gpu.TickWork(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - program_manager.LocalMemoryWarmup(); + if (pipeline->UsesLocalMemory()) { + program_manager.LocalMemoryWarmup(); + } pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); @@ -372,7 +374,9 @@ void RasterizerOpenGL::DispatchCompute() { if (!pipeline) { return; } - program_manager.LocalMemoryWarmup(); + if (pipeline->UsesLocalMemory()) { + program_manager.LocalMemoryWarmup(); + } pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description}; -- cgit v1.2.3 From 4f160633d369b702a45ace9b6ff133312761c5f8 Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 25 Jun 2023 19:06:51 -0400 Subject: OpenGL: Limit lmem warmup to NVIDIA :frog: --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ src/video_core/renderer_opengl/gl_shader_manager.cpp | 13 +++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'src/video_core/renderer_opengl') diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 03d234f2f..33e63c17d 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -194,6 +194,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_bool_ref_bug = true; } } + has_lmem_perf_bug = is_nvidia; strict_context_required = emu_window.StrictContextRequired(); // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad27264e5..a5a6bbbba 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -192,6 +192,10 @@ public: return supports_conditional_barriers; } + bool HasLmemPerfBug() const { + return has_lmem_perf_bug; + } + private: static bool TestVariableAoffi(); static bool TestPreciseBug(); @@ -238,6 +242,7 @@ private: bool can_report_memory{}; bool strict_context_required{}; bool supports_conditional_barriers{}; + bool has_lmem_perf_bug{}; std::string vendor_name; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 2f6ba6823..03d4b9d06 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -14,12 +14,15 @@ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; -ProgramManager::ProgramManager(const Device& device) - : lmem_warmup_program(CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER)) { +ProgramManager::ProgramManager(const Device& device) { glCreateProgramPipelines(1, &pipeline.handle); if (device.UseAssemblyShaders()) { glEnable(GL_COMPUTE_PROGRAM_NV); } + if (device.HasLmemPerfBug()) { + lmem_warmup_program = + CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER); + } } void ProgramManager::BindComputeProgram(GLuint program) { @@ -102,8 +105,10 @@ void ProgramManager::BindAssemblyPrograms(std::span