diff options
| author | 2023-06-25 18:43:23 -0400 | |
|---|---|---|
| committer | 2023-06-25 18:43:23 -0400 | |
| commit | 82107b33a2251eb4f55ab2006a8fc0cb47cc39e8 (patch) | |
| tree | 75eacbc92cbe0f276bfca41425447dfc9cc9d91a | |
| parent | Merge pull request #10891 from german77/sdl28v2 (diff) | |
| download | yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.tar.gz yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.tar.xz yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.zip | |
OpenGL: Add Local Memory warmup shader
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/host_shaders/opengl_lmem_warmup.comp | 47 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 3 |
5 files changed, 62 insertions, 1 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2442c3c29..e61d9af80 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -33,6 +33,7 @@ set(SHADER_FILES | |||
| 33 | opengl_fidelityfx_fsr.frag | 33 | opengl_fidelityfx_fsr.frag |
| 34 | opengl_fidelityfx_fsr_easu.frag | 34 | opengl_fidelityfx_fsr_easu.frag |
| 35 | opengl_fidelityfx_fsr_rcas.frag | 35 | opengl_fidelityfx_fsr_rcas.frag |
| 36 | opengl_lmem_warmup.comp | ||
| 36 | opengl_present.frag | 37 | opengl_present.frag |
| 37 | opengl_present.vert | 38 | opengl_present.vert |
| 38 | opengl_present_scaleforce.frag | 39 | opengl_present_scaleforce.frag |
diff --git a/src/video_core/host_shaders/opengl_lmem_warmup.comp b/src/video_core/host_shaders/opengl_lmem_warmup.comp new file mode 100644 index 000000000..518268477 --- /dev/null +++ b/src/video_core/host_shaders/opengl_lmem_warmup.comp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | // This shader is a workaround for a quirk in NVIDIA OpenGL drivers | ||
| 5 | // Shaders using local memory see a great performance benefit if a shader that was dispatched | ||
| 6 | // before it had more local memory allocated. | ||
| 7 | // This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that | ||
| 8 | // subsequent shaders see the performance boost. | ||
| 9 | |||
| 10 | // NOTE: This shader does no actual meaningful work and returns immediately, | ||
| 11 | // it is simply a means to have the driver expect a shader using lots of local memory. | ||
| 12 | |||
| 13 | #version 450 | ||
| 14 | |||
| 15 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; | ||
| 16 | |||
| 17 | layout(location = 0) uniform uint uniform_data; | ||
| 18 | |||
| 19 | layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image; | ||
| 20 | |||
| 21 | #define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler | ||
| 22 | #define NUM_LMEM_CONSTANTS 1 | ||
| 23 | #define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS | ||
| 24 | |||
| 25 | uint lmem_0[ARRAY_SIZE]; | ||
| 26 | const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0)); | ||
| 27 | |||
| 28 | void main() { | ||
| 29 | const uint global_id = gl_GlobalInvocationID.x; | ||
| 30 | if (global_id <= 128) { | ||
| 31 | // Since the shader is called with a dispatch of 1x1x1 | ||
| 32 | // This should always be the case, and this shader will not actually execute | ||
| 33 | return; | ||
| 34 | } | ||
| 35 | for (uint t = 0; t < uniform_data; t++) { | ||
| 36 | const uint offset = (t * uniform_data); | ||
| 37 | lmem_0[offset] = t; | ||
| 38 | } | ||
| 39 | const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x); | ||
| 40 | const uint value = lmem_0[offset]; | ||
| 41 | const uint const_value = constant_values[offset / 4][offset % 4]; | ||
| 42 | const uvec4 color = uvec4(value + const_value); | ||
| 43 | |||
| 44 | // A "side-effect" is needed so the variables don't get optimized out, | ||
| 45 | // but this should never execute so there should be no clobbering of previously bound state. | ||
| 46 | imageStore(dest_image, ivec3(gl_GlobalInvocationID), color); | ||
| 47 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fc711c44a..d03288516 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -222,6 +222,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 222 | gpu.TickWork(); | 222 | gpu.TickWork(); |
| 223 | 223 | ||
| 224 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 224 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 225 | program_manager.LocalMemoryWarmup(); | ||
| 225 | pipeline->SetEngine(maxwell3d, gpu_memory); | 226 | pipeline->SetEngine(maxwell3d, gpu_memory); |
| 226 | pipeline->Configure(is_indexed); | 227 | pipeline->Configure(is_indexed); |
| 227 | 228 | ||
| @@ -371,6 +372,7 @@ void RasterizerOpenGL::DispatchCompute() { | |||
| 371 | if (!pipeline) { | 372 | if (!pipeline) { |
| 372 | return; | 373 | return; |
| 373 | } | 374 | } |
| 375 | program_manager.LocalMemoryWarmup(); | ||
| 374 | pipeline->SetEngine(kepler_compute, gpu_memory); | 376 | pipeline->SetEngine(kepler_compute, gpu_memory); |
| 375 | pipeline->Configure(); | 377 | pipeline->Configure(); |
| 376 | const auto& qmd{kepler_compute->launch_description}; | 378 | const auto& qmd{kepler_compute->launch_description}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 98841ae65..2f6ba6823 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -3,7 +3,9 @@ | |||
| 3 | 3 | ||
| 4 | #include <glad/glad.h> | 4 | #include <glad/glad.h> |
| 5 | 5 | ||
| 6 | #include "video_core/host_shaders/opengl_lmem_warmup_comp.h" | ||
| 6 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 7 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 7 | 9 | ||
| 8 | namespace OpenGL { | 10 | namespace OpenGL { |
| 9 | 11 | ||
| @@ -12,7 +14,8 @@ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{ | |||
| 12 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | 14 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, |
| 13 | }; | 15 | }; |
| 14 | 16 | ||
| 15 | ProgramManager::ProgramManager(const Device& device) { | 17 | ProgramManager::ProgramManager(const Device& device) |
| 18 | : lmem_warmup_program(CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER)) { | ||
| 16 | glCreateProgramPipelines(1, &pipeline.handle); | 19 | glCreateProgramPipelines(1, &pipeline.handle); |
| 17 | if (device.UseAssemblyShaders()) { | 20 | if (device.UseAssemblyShaders()) { |
| 18 | glEnable(GL_COMPUTE_PROGRAM_NV); | 21 | glEnable(GL_COMPUTE_PROGRAM_NV); |
| @@ -98,6 +101,11 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU | |||
| 98 | 101 | ||
| 99 | void ProgramManager::RestoreGuestCompute() {} | 102 | void ProgramManager::RestoreGuestCompute() {} |
| 100 | 103 | ||
| 104 | void ProgramManager::LocalMemoryWarmup() { | ||
| 105 | BindComputeProgram(lmem_warmup_program.handle); | ||
| 106 | glDispatchCompute(1, 1, 1); | ||
| 107 | } | ||
| 108 | |||
| 101 | void ProgramManager::BindPipeline() { | 109 | void ProgramManager::BindPipeline() { |
| 102 | if (!is_pipeline_bound) { | 110 | if (!is_pipeline_bound) { |
| 103 | is_pipeline_bound = true; | 111 | is_pipeline_bound = true; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 07ffab77f..852d8c88e 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -30,6 +30,8 @@ public: | |||
| 30 | 30 | ||
| 31 | void RestoreGuestCompute(); | 31 | void RestoreGuestCompute(); |
| 32 | 32 | ||
| 33 | void LocalMemoryWarmup(); | ||
| 34 | |||
| 33 | private: | 35 | private: |
| 34 | void BindPipeline(); | 36 | void BindPipeline(); |
| 35 | 37 | ||
| @@ -44,6 +46,7 @@ private: | |||
| 44 | u32 current_stage_mask = 0; | 46 | u32 current_stage_mask = 0; |
| 45 | std::array<GLuint, NUM_STAGES> current_programs{}; | 47 | std::array<GLuint, NUM_STAGES> current_programs{}; |
| 46 | GLuint current_assembly_compute_program = 0; | 48 | GLuint current_assembly_compute_program = 0; |
| 49 | OGLProgram lmem_warmup_program; | ||
| 47 | }; | 50 | }; |
| 48 | 51 | ||
| 49 | } // namespace OpenGL | 52 | } // namespace OpenGL |