diff options
| author | 2023-06-27 11:21:10 -0400 | |
|---|---|---|
| committer | 2023-06-27 11:21:10 -0400 | |
| commit | dafbc86366f8bcd9153949db4d141ec489928f81 (patch) | |
| tree | 8496cb635eef6967a99d0b7dfad41419a24b5efd /src/video_core/renderer_opengl | |
| parent | Merge pull request #10925 from t895/fs-agony (diff) | |
| parent | OpenGL: Limit lmem warmup to NVIDIA (diff) | |
| download | yuzu-dafbc86366f8bcd9153949db4d141ec489928f81.tar.gz yuzu-dafbc86366f8bcd9153949db4d141ec489928f81.tar.xz yuzu-dafbc86366f8bcd9153949db4d141ec489928f81.zip | |
Merge pull request #10916 from ameerj/lolmem
OpenGL: Add Local Memory warmup shader for Nvidia
Diffstat (limited to 'src/video_core/renderer_opengl')
9 files changed, 40 insertions, 0 deletions
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 3151c0db8..f9ca55c36 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp | |||
| @@ -63,6 +63,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac | |||
| 63 | writes_global_memory = !use_storage_buffers && | 63 | writes_global_memory = !use_storage_buffers && |
| 64 | std::ranges::any_of(info.storage_buffers_descriptors, | 64 | std::ranges::any_of(info.storage_buffers_descriptors, |
| 65 | [](const auto& desc) { return desc.is_written; }); | 65 | [](const auto& desc) { return desc.is_written; }); |
| 66 | uses_local_memory = info.uses_local_memory; | ||
| 66 | if (force_context_flush) { | 67 | if (force_context_flush) { |
| 67 | std::scoped_lock lock{built_mutex}; | 68 | std::scoped_lock lock{built_mutex}; |
| 68 | built_fence.Create(); | 69 | built_fence.Create(); |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index 9bcc72b59..c26b4fa5e 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h | |||
| @@ -59,6 +59,10 @@ public: | |||
| 59 | return writes_global_memory; | 59 | return writes_global_memory; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | [[nodiscard]] bool UsesLocalMemory() const noexcept { | ||
| 63 | return uses_local_memory; | ||
| 64 | } | ||
| 65 | |||
| 62 | void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, | 66 | void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, |
| 63 | Tegra::MemoryManager* gpu_memory_) { | 67 | Tegra::MemoryManager* gpu_memory_) { |
| 64 | kepler_compute = kepler_compute_; | 68 | kepler_compute = kepler_compute_; |
| @@ -84,6 +88,7 @@ private: | |||
| 84 | 88 | ||
| 85 | bool use_storage_buffers{}; | 89 | bool use_storage_buffers{}; |
| 86 | bool writes_global_memory{}; | 90 | bool writes_global_memory{}; |
| 91 | bool uses_local_memory{}; | ||
| 87 | 92 | ||
| 88 | std::mutex built_mutex; | 93 | std::mutex built_mutex; |
| 89 | std::condition_variable built_condvar; | 94 | std::condition_variable built_condvar; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 03d234f2f..33e63c17d 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -194,6 +194,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 194 | has_bool_ref_bug = true; | 194 | has_bool_ref_bug = true; |
| 195 | } | 195 | } |
| 196 | } | 196 | } |
| 197 | has_lmem_perf_bug = is_nvidia; | ||
| 197 | 198 | ||
| 198 | strict_context_required = emu_window.StrictContextRequired(); | 199 | strict_context_required = emu_window.StrictContextRequired(); |
| 199 | // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. | 200 | // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad27264e5..a5a6bbbba 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -192,6 +192,10 @@ public: | |||
| 192 | return supports_conditional_barriers; | 192 | return supports_conditional_barriers; |
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | bool HasLmemPerfBug() const { | ||
| 196 | return has_lmem_perf_bug; | ||
| 197 | } | ||
| 198 | |||
| 195 | private: | 199 | private: |
| 196 | static bool TestVariableAoffi(); | 200 | static bool TestVariableAoffi(); |
| 197 | static bool TestPreciseBug(); | 201 | static bool TestPreciseBug(); |
| @@ -238,6 +242,7 @@ private: | |||
| 238 | bool can_report_memory{}; | 242 | bool can_report_memory{}; |
| 239 | bool strict_context_required{}; | 243 | bool strict_context_required{}; |
| 240 | bool supports_conditional_barriers{}; | 244 | bool supports_conditional_barriers{}; |
| 245 | bool has_lmem_perf_bug{}; | ||
| 241 | 246 | ||
| 242 | std::string vendor_name; | 247 | std::string vendor_name; |
| 243 | }; | 248 | }; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c58f760b8..23a48c6fe 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 215 | 215 | ||
| 216 | writes_global_memory |= std::ranges::any_of( | 216 | writes_global_memory |= std::ranges::any_of( |
| 217 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | 217 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); |
| 218 | uses_local_memory |= info.uses_local_memory; | ||
| 218 | } | 219 | } |
| 219 | ASSERT(num_textures <= MAX_TEXTURES); | 220 | ASSERT(num_textures <= MAX_TEXTURES); |
| 220 | ASSERT(num_images <= MAX_IMAGES); | 221 | ASSERT(num_images <= MAX_IMAGES); |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 7bab3be0a..7b3d7eae8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -98,6 +98,10 @@ public: | |||
| 98 | return writes_global_memory; | 98 | return writes_global_memory; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | [[nodiscard]] bool UsesLocalMemory() const noexcept { | ||
| 102 | return uses_local_memory; | ||
| 103 | } | ||
| 104 | |||
| 101 | [[nodiscard]] bool IsBuilt() noexcept; | 105 | [[nodiscard]] bool IsBuilt() noexcept; |
| 102 | 106 | ||
| 103 | template <typename Spec> | 107 | template <typename Spec> |
| @@ -146,6 +150,7 @@ private: | |||
| 146 | 150 | ||
| 147 | bool use_storage_buffers{}; | 151 | bool use_storage_buffers{}; |
| 148 | bool writes_global_memory{}; | 152 | bool writes_global_memory{}; |
| 153 | bool uses_local_memory{}; | ||
| 149 | 154 | ||
| 150 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; | 155 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; |
| 151 | GLsizei num_xfb_attribs{}; | 156 | GLsizei num_xfb_attribs{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fc711c44a..edf527f2d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -222,6 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 222 | gpu.TickWork(); | 222 | gpu.TickWork(); |
| 223 | 223 | ||
| 224 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 224 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 225 | if (pipeline->UsesLocalMemory()) { | ||
| 226 | program_manager.LocalMemoryWarmup(); | ||
| 227 | } | ||
| 225 | pipeline->SetEngine(maxwell3d, gpu_memory); | 228 | pipeline->SetEngine(maxwell3d, gpu_memory); |
| 226 | pipeline->Configure(is_indexed); | 229 | pipeline->Configure(is_indexed); |
| 227 | 230 | ||
| @@ -371,6 +374,9 @@ void RasterizerOpenGL::DispatchCompute() { | |||
| 371 | if (!pipeline) { | 374 | if (!pipeline) { |
| 372 | return; | 375 | return; |
| 373 | } | 376 | } |
| 377 | if (pipeline->UsesLocalMemory()) { | ||
| 378 | program_manager.LocalMemoryWarmup(); | ||
| 379 | } | ||
| 374 | pipeline->SetEngine(kepler_compute, gpu_memory); | 380 | pipeline->SetEngine(kepler_compute, gpu_memory); |
| 375 | pipeline->Configure(); | 381 | pipeline->Configure(); |
| 376 | const auto& qmd{kepler_compute->launch_description}; | 382 | const auto& qmd{kepler_compute->launch_description}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 98841ae65..03d4b9d06 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -3,7 +3,9 @@ | |||
| 3 | 3 | ||
| 4 | #include <glad/glad.h> | 4 | #include <glad/glad.h> |
| 5 | 5 | ||
| 6 | #include "video_core/host_shaders/opengl_lmem_warmup_comp.h" | ||
| 6 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 7 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 7 | 9 | ||
| 8 | namespace OpenGL { | 10 | namespace OpenGL { |
| 9 | 11 | ||
| @@ -17,6 +19,10 @@ ProgramManager::ProgramManager(const Device& device) { | |||
| 17 | if (device.UseAssemblyShaders()) { | 19 | if (device.UseAssemblyShaders()) { |
| 18 | glEnable(GL_COMPUTE_PROGRAM_NV); | 20 | glEnable(GL_COMPUTE_PROGRAM_NV); |
| 19 | } | 21 | } |
| 22 | if (device.HasLmemPerfBug()) { | ||
| 23 | lmem_warmup_program = | ||
| 24 | CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER); | ||
| 25 | } | ||
| 20 | } | 26 | } |
| 21 | 27 | ||
| 22 | void ProgramManager::BindComputeProgram(GLuint program) { | 28 | void ProgramManager::BindComputeProgram(GLuint program) { |
| @@ -98,6 +104,13 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU | |||
| 98 | 104 | ||
| 99 | void ProgramManager::RestoreGuestCompute() {} | 105 | void ProgramManager::RestoreGuestCompute() {} |
| 100 | 106 | ||
| 107 | void ProgramManager::LocalMemoryWarmup() { | ||
| 108 | if (lmem_warmup_program.handle != 0) { | ||
| 109 | BindComputeProgram(lmem_warmup_program.handle); | ||
| 110 | glDispatchCompute(1, 1, 1); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 101 | void ProgramManager::BindPipeline() { | 114 | void ProgramManager::BindPipeline() { |
| 102 | if (!is_pipeline_bound) { | 115 | if (!is_pipeline_bound) { |
| 103 | is_pipeline_bound = true; | 116 | is_pipeline_bound = true; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 07ffab77f..852d8c88e 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -30,6 +30,8 @@ public: | |||
| 30 | 30 | ||
| 31 | void RestoreGuestCompute(); | 31 | void RestoreGuestCompute(); |
| 32 | 32 | ||
| 33 | void LocalMemoryWarmup(); | ||
| 34 | |||
| 33 | private: | 35 | private: |
| 34 | void BindPipeline(); | 36 | void BindPipeline(); |
| 35 | 37 | ||
| @@ -44,6 +46,7 @@ private: | |||
| 44 | u32 current_stage_mask = 0; | 46 | u32 current_stage_mask = 0; |
| 45 | std::array<GLuint, NUM_STAGES> current_programs{}; | 47 | std::array<GLuint, NUM_STAGES> current_programs{}; |
| 46 | GLuint current_assembly_compute_program = 0; | 48 | GLuint current_assembly_compute_program = 0; |
| 49 | OGLProgram lmem_warmup_program; | ||
| 47 | }; | 50 | }; |
| 48 | 51 | ||
| 49 | } // namespace OpenGL | 52 | } // namespace OpenGL |