summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ameerj2023-06-25 18:43:23 -0400
committerGravatar ameerj2023-06-25 18:43:23 -0400
commit82107b33a2251eb4f55ab2006a8fc0cb47cc39e8 (patch)
tree75eacbc92cbe0f276bfca41425447dfc9cc9d91a
parentMerge pull request #10891 from german77/sdl28v2 (diff)
downloadyuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.tar.gz
yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.tar.xz
yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.zip
OpenGL: Add Local Memory warmup shader
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_lmem_warmup.comp47
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h3
5 files changed, 62 insertions, 1 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2442c3c29..e61d9af80 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SHADER_FILES
33 opengl_fidelityfx_fsr.frag 33 opengl_fidelityfx_fsr.frag
34 opengl_fidelityfx_fsr_easu.frag 34 opengl_fidelityfx_fsr_easu.frag
35 opengl_fidelityfx_fsr_rcas.frag 35 opengl_fidelityfx_fsr_rcas.frag
36 opengl_lmem_warmup.comp
36 opengl_present.frag 37 opengl_present.frag
37 opengl_present.vert 38 opengl_present.vert
38 opengl_present_scaleforce.frag 39 opengl_present_scaleforce.frag
diff --git a/src/video_core/host_shaders/opengl_lmem_warmup.comp b/src/video_core/host_shaders/opengl_lmem_warmup.comp
new file mode 100644
index 000000000..518268477
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_lmem_warmup.comp
@@ -0,0 +1,47 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4// This shader is a workaround for a quirk in NVIDIA OpenGL drivers
5// Shaders using local memory see a great performance benefit if a shader that was dispatched
6// before it had more local memory allocated.
7// This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that
8// subsequent shaders see the performance boost.
9
10// NOTE: This shader does no actual meaningful work and returns immediately,
11// it is simply a means to have the driver expect a shader using lots of local memory.
12
13#version 450
14
15layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
16
17layout(location = 0) uniform uint uniform_data;
18
19layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image;
20
21#define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler
22#define NUM_LMEM_CONSTANTS 1
23#define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS
24
25uint lmem_0[ARRAY_SIZE];
26const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0));
27
28void main() {
29 const uint global_id = gl_GlobalInvocationID.x;
30 if (global_id <= 128) {
31 // Since the shader is called with a dispatch of 1x1x1
32 // This should always be the case, and this shader will not actually execute
33 return;
34 }
35 for (uint t = 0; t < uniform_data; t++) {
36 const uint offset = (t * uniform_data);
37 lmem_0[offset] = t;
38 }
39 const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x);
40 const uint value = lmem_0[offset];
41 const uint const_value = constant_values[offset / 4][offset % 4];
42 const uvec4 color = uvec4(value + const_value);
43
44 // A "side-effect" is needed so the variables don't get optimized out,
45 // but this should never execute so there should be no clobbering of previously bound state.
46 imageStore(dest_image, ivec3(gl_GlobalInvocationID), color);
47}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index fc711c44a..d03288516 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -222,6 +222,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
222 gpu.TickWork(); 222 gpu.TickWork();
223 223
224 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 224 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
225 program_manager.LocalMemoryWarmup();
225 pipeline->SetEngine(maxwell3d, gpu_memory); 226 pipeline->SetEngine(maxwell3d, gpu_memory);
226 pipeline->Configure(is_indexed); 227 pipeline->Configure(is_indexed);
227 228
@@ -371,6 +372,7 @@ void RasterizerOpenGL::DispatchCompute() {
371 if (!pipeline) { 372 if (!pipeline) {
372 return; 373 return;
373 } 374 }
375 program_manager.LocalMemoryWarmup();
374 pipeline->SetEngine(kepler_compute, gpu_memory); 376 pipeline->SetEngine(kepler_compute, gpu_memory);
375 pipeline->Configure(); 377 pipeline->Configure();
376 const auto& qmd{kepler_compute->launch_description}; 378 const auto& qmd{kepler_compute->launch_description};
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 98841ae65..2f6ba6823 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -3,7 +3,9 @@
3 3
4#include <glad/glad.h> 4#include <glad/glad.h>
5 5
6#include "video_core/host_shaders/opengl_lmem_warmup_comp.h"
6#include "video_core/renderer_opengl/gl_shader_manager.h" 7#include "video_core/renderer_opengl/gl_shader_manager.h"
8#include "video_core/renderer_opengl/gl_shader_util.h"
7 9
8namespace OpenGL { 10namespace OpenGL {
9 11
@@ -12,7 +14,8 @@ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
12 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, 14 GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
13}; 15};
14 16
15ProgramManager::ProgramManager(const Device& device) { 17ProgramManager::ProgramManager(const Device& device)
18 : lmem_warmup_program(CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER)) {
16 glCreateProgramPipelines(1, &pipeline.handle); 19 glCreateProgramPipelines(1, &pipeline.handle);
17 if (device.UseAssemblyShaders()) { 20 if (device.UseAssemblyShaders()) {
18 glEnable(GL_COMPUTE_PROGRAM_NV); 21 glEnable(GL_COMPUTE_PROGRAM_NV);
@@ -98,6 +101,11 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU
98 101
99void ProgramManager::RestoreGuestCompute() {} 102void ProgramManager::RestoreGuestCompute() {}
100 103
104void ProgramManager::LocalMemoryWarmup() {
105 BindComputeProgram(lmem_warmup_program.handle);
106 glDispatchCompute(1, 1, 1);
107}
108
101void ProgramManager::BindPipeline() { 109void ProgramManager::BindPipeline() {
102 if (!is_pipeline_bound) { 110 if (!is_pipeline_bound) {
103 is_pipeline_bound = true; 111 is_pipeline_bound = true;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 07ffab77f..852d8c88e 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -30,6 +30,8 @@ public:
30 30
31 void RestoreGuestCompute(); 31 void RestoreGuestCompute();
32 32
33 void LocalMemoryWarmup();
34
33private: 35private:
34 void BindPipeline(); 36 void BindPipeline();
35 37
@@ -44,6 +46,7 @@ private:
44 u32 current_stage_mask = 0; 46 u32 current_stage_mask = 0;
45 std::array<GLuint, NUM_STAGES> current_programs{}; 47 std::array<GLuint, NUM_STAGES> current_programs{};
46 GLuint current_assembly_compute_program = 0; 48 GLuint current_assembly_compute_program = 0;
49 OGLProgram lmem_warmup_program;
47}; 50};
48 51
49} // namespace OpenGL 52} // namespace OpenGL