diff options
| author | 2023-06-25 18:43:23 -0400 | |
|---|---|---|
| committer | 2023-06-25 18:43:23 -0400 | |
| commit | 82107b33a2251eb4f55ab2006a8fc0cb47cc39e8 (patch) | |
| tree | 75eacbc92cbe0f276bfca41425447dfc9cc9d91a /src/video_core/host_shaders | |
| parent | Merge pull request #10891 from german77/sdl28v2 (diff) | |
| download | yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.tar.gz yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.tar.xz yuzu-82107b33a2251eb4f55ab2006a8fc0cb47cc39e8.zip | |
OpenGL: Add Local Memory warmup shader
Diffstat (limited to 'src/video_core/host_shaders')
| -rw-r--r-- | src/video_core/host_shaders/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/host_shaders/opengl_lmem_warmup.comp | 47 |
2 files changed, 48 insertions, 0 deletions
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2442c3c29..e61d9af80 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -33,6 +33,7 @@ set(SHADER_FILES | |||
| 33 | opengl_fidelityfx_fsr.frag | 33 | opengl_fidelityfx_fsr.frag |
| 34 | opengl_fidelityfx_fsr_easu.frag | 34 | opengl_fidelityfx_fsr_easu.frag |
| 35 | opengl_fidelityfx_fsr_rcas.frag | 35 | opengl_fidelityfx_fsr_rcas.frag |
| 36 | opengl_lmem_warmup.comp | ||
| 36 | opengl_present.frag | 37 | opengl_present.frag |
| 37 | opengl_present.vert | 38 | opengl_present.vert |
| 38 | opengl_present_scaleforce.frag | 39 | opengl_present_scaleforce.frag |
diff --git a/src/video_core/host_shaders/opengl_lmem_warmup.comp b/src/video_core/host_shaders/opengl_lmem_warmup.comp new file mode 100644 index 000000000..518268477 --- /dev/null +++ b/src/video_core/host_shaders/opengl_lmem_warmup.comp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | // This shader is a workaround for a quirk in NVIDIA OpenGL drivers | ||
| 5 | // Shaders using local memory see a great performance benefit if a shader that was dispatched | ||
| 6 | // before it had more local memory allocated. | ||
| 7 | // This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that | ||
| 8 | // subsequent shaders see the performance boost. | ||
| 9 | |||
| 10 | // NOTE: This shader does no actual meaningful work and returns immediately, | ||
| 11 | // it is simply a means to have the driver expect a shader using lots of local memory. | ||
| 12 | |||
| 13 | #version 450 | ||
| 14 | |||
| 15 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; | ||
| 16 | |||
| 17 | layout(location = 0) uniform uint uniform_data; | ||
| 18 | |||
| 19 | layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image; | ||
| 20 | |||
| 21 | #define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler | ||
| 22 | #define NUM_LMEM_CONSTANTS 1 | ||
| 23 | #define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS | ||
| 24 | |||
| 25 | uint lmem_0[ARRAY_SIZE]; | ||
| 26 | const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0)); | ||
| 27 | |||
| 28 | void main() { | ||
| 29 | const uint global_id = gl_GlobalInvocationID.x; | ||
| 30 | if (global_id <= 128) { | ||
| 31 | // Since the shader is called with a dispatch of 1x1x1 | ||
| 32 | // This should always be the case, and this shader will not actually execute | ||
| 33 | return; | ||
| 34 | } | ||
| 35 | for (uint t = 0; t < uniform_data; t++) { | ||
| 36 | const uint offset = (t * uniform_data); | ||
| 37 | lmem_0[offset] = t; | ||
| 38 | } | ||
| 39 | const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x); | ||
| 40 | const uint value = lmem_0[offset]; | ||
| 41 | const uint const_value = constant_values[offset / 4][offset % 4]; | ||
| 42 | const uvec4 color = uvec4(value + const_value); | ||
| 43 | |||
| 44 | // A "side-effect" is needed so the variables don't get optimized out, | ||
| 45 | // but this should never execute so there should be no clobbering of previously bound state. | ||
| 46 | imageStore(dest_image, ivec3(gl_GlobalInvocationID), color); | ||
| 47 | } | ||