diff options
| author | 2020-07-10 13:36:38 +1000 | |
|---|---|---|
| committer | 2020-07-17 14:24:57 +1000 | |
| commit | 468bd9c1b0f9e74f7c096b127a94a94e4ed7caec (patch) | |
| tree | 50a0f28b7c817222247369400bedf5de1ccc4e19 /src/video_core | |
| parent | Merge pull request #4347 from lioncash/logging (diff) | |
| download | yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.gz yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.tar.xz yuzu-468bd9c1b0f9e74f7c096b127a94a94e4ed7caec.zip | |
async shaders
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 181 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 36 | ||||
| -rw-r--r-- | src/video_core/shader/async_shaders.cpp | 170 | ||||
| -rw-r--r-- | src/video_core/shader/async_shaders.h | 107 | ||||
| -rw-r--r-- | src/video_core/shader_notify.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/shader_notify.h | 29 |
14 files changed, 571 insertions, 58 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 21c46a567..3cd896a0f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -98,6 +98,8 @@ add_library(video_core STATIC | |||
| 98 | sampler_cache.cpp | 98 | sampler_cache.cpp |
| 99 | sampler_cache.h | 99 | sampler_cache.h |
| 100 | shader_cache.h | 100 | shader_cache.h |
| 101 | shader_notify.cpp | ||
| 102 | shader_notify.h | ||
| 101 | shader/decode/arithmetic.cpp | 103 | shader/decode/arithmetic.cpp |
| 102 | shader/decode/arithmetic_immediate.cpp | 104 | shader/decode/arithmetic_immediate.cpp |
| 103 | shader/decode/bfe.cpp | 105 | shader/decode/bfe.cpp |
| @@ -128,6 +130,8 @@ add_library(video_core STATIC | |||
| 128 | shader/decode/other.cpp | 130 | shader/decode/other.cpp |
| 129 | shader/ast.cpp | 131 | shader/ast.cpp |
| 130 | shader/ast.h | 132 | shader/ast.h |
| 133 | shader/async_shaders.cpp | ||
| 134 | shader/async_shaders.h | ||
| 131 | shader/compiler_settings.cpp | 135 | shader/compiler_settings.cpp |
| 132 | shader/compiler_settings.h | 136 | shader/compiler_settings.h |
| 133 | shader/control_flow.cpp | 137 | shader/control_flow.cpp |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 758bfe148..8e19c3373 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/gpu.h" | 20 | #include "video_core/gpu.h" |
| 21 | #include "video_core/memory_manager.h" | 21 | #include "video_core/memory_manager.h" |
| 22 | #include "video_core/renderer_base.h" | 22 | #include "video_core/renderer_base.h" |
| 23 | #include "video_core/shader_notify.h" | ||
| 23 | #include "video_core/video_core.h" | 24 | #include "video_core/video_core.h" |
| 24 | 25 | ||
| 25 | namespace Tegra { | 26 | namespace Tegra { |
| @@ -36,6 +37,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render | |||
| 36 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); | 37 | kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); |
| 37 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); | 38 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); |
| 38 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | 39 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); |
| 40 | shader_notify = std::make_unique<VideoCore::ShaderNotify>(); | ||
| 39 | } | 41 | } |
| 40 | 42 | ||
| 41 | GPU::~GPU() = default; | 43 | GPU::~GPU() = default; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 2c42483bd..8d04d9fd9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -33,6 +33,7 @@ class System; | |||
| 33 | 33 | ||
| 34 | namespace VideoCore { | 34 | namespace VideoCore { |
| 35 | class RendererBase; | 35 | class RendererBase; |
| 36 | class ShaderNotify; | ||
| 36 | } // namespace VideoCore | 37 | } // namespace VideoCore |
| 37 | 38 | ||
| 38 | namespace Tegra { | 39 | namespace Tegra { |
| @@ -207,6 +208,14 @@ public: | |||
| 207 | return *renderer; | 208 | return *renderer; |
| 208 | } | 209 | } |
| 209 | 210 | ||
| 211 | VideoCore::ShaderNotify& ShaderNotify() { | ||
| 212 | return *shader_notify; | ||
| 213 | } | ||
| 214 | |||
| 215 | const VideoCore::ShaderNotify& ShaderNotify() const { | ||
| 216 | return *shader_notify; | ||
| 217 | } | ||
| 218 | |||
| 210 | // Waits for the GPU to finish working | 219 | // Waits for the GPU to finish working |
| 211 | virtual void WaitIdle() const = 0; | 220 | virtual void WaitIdle() const = 0; |
| 212 | 221 | ||
| @@ -347,6 +356,8 @@ private: | |||
| 347 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 356 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 348 | /// Inline memory engine | 357 | /// Inline memory engine |
| 349 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 358 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 359 | /// Shader build notifier | ||
| 360 | std::unique_ptr<VideoCore::ShaderNotify> shader_notify; | ||
| 350 | 361 | ||
| 351 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; | 362 | std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{}; |
| 352 | 363 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c1f20f0ab..7c27eed6d 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -233,6 +233,8 @@ Device::Device() | |||
| 233 | GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && | 233 | GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && |
| 234 | GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; | 234 | GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; |
| 235 | 235 | ||
| 236 | use_asynchronous_shaders = Settings::values.use_asynchronous_shaders; | ||
| 237 | |||
| 236 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 238 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 237 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 239 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
| 238 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | 240 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e1d811966..94d38d7d1 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -104,6 +104,10 @@ public: | |||
| 104 | return use_assembly_shaders; | 104 | return use_assembly_shaders; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | bool UseAsynchronousShaders() const { | ||
| 108 | return use_asynchronous_shaders; | ||
| 109 | } | ||
| 110 | |||
| 107 | private: | 111 | private: |
| 108 | static bool TestVariableAoffi(); | 112 | static bool TestVariableAoffi(); |
| 109 | static bool TestPreciseBug(); | 113 | static bool TestPreciseBug(); |
| @@ -127,6 +131,7 @@ private: | |||
| 127 | bool has_fast_buffer_sub_data{}; | 131 | bool has_fast_buffer_sub_data{}; |
| 128 | bool has_nv_viewport_array2{}; | 132 | bool has_nv_viewport_array2{}; |
| 129 | bool use_assembly_shaders{}; | 133 | bool use_assembly_shaders{}; |
| 134 | bool use_asynchronous_shaders{}; | ||
| 130 | }; | 135 | }; |
| 131 | 136 | ||
| 132 | } // namespace OpenGL | 137 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e960a0ef1..fbd11e28f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -149,7 +149,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 149 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, | 149 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |
| 150 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, | 150 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |
| 151 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, | 151 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |
| 152 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | 152 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, |
| 153 | async_shaders{emu_window} { | ||
| 153 | CheckExtensions(); | 154 | CheckExtensions(); |
| 154 | 155 | ||
| 155 | unified_uniform_buffer.Create(); | 156 | unified_uniform_buffer.Create(); |
| @@ -162,6 +163,23 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 162 | nullptr, 0); | 163 | nullptr, 0); |
| 163 | } | 164 | } |
| 164 | } | 165 | } |
| 166 | |||
| 167 | if (device.UseAsynchronousShaders()) { | ||
| 168 | // Max worker threads we should allow | ||
| 169 | constexpr auto MAX_THREADS = 8u; | ||
| 170 | // Amount of threads we should reserve for other parts of yuzu | ||
| 171 | constexpr auto RESERVED_THREADS = 6u; | ||
| 172 | // Get the amount of threads we can use(this can return zero) | ||
| 173 | const auto cpu_thread_count = | ||
| 174 | std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); | ||
| 175 | // Deduce how many "extra" threads we have to use. | ||
| 176 | const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; | ||
| 177 | // Always allow at least 1 thread regardless of our settings | ||
| 178 | const auto max_worker_count = std::max(1u, max_threads_unused); | ||
| 179 | // Don't use more than MAX_THREADS | ||
| 180 | const auto worker_count = std::min(max_worker_count, MAX_THREADS); | ||
| 181 | async_shaders.AllocateWorkers(worker_count); | ||
| 182 | } | ||
| 165 | } | 183 | } |
| 166 | 184 | ||
| 167 | RasterizerOpenGL::~RasterizerOpenGL() { | 185 | RasterizerOpenGL::~RasterizerOpenGL() { |
| @@ -336,7 +354,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 336 | continue; | 354 | continue; |
| 337 | } | 355 | } |
| 338 | 356 | ||
| 339 | Shader* const shader = shader_cache.GetStageProgram(program); | 357 | Shader* shader = shader_cache.GetStageProgram(program, async_shaders); |
| 340 | 358 | ||
| 341 | if (device.UseAssemblyShaders()) { | 359 | if (device.UseAssemblyShaders()) { |
| 342 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this | 360 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this |
| @@ -353,7 +371,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 353 | SetupDrawTextures(stage, shader); | 371 | SetupDrawTextures(stage, shader); |
| 354 | SetupDrawImages(stage, shader); | 372 | SetupDrawImages(stage, shader); |
| 355 | 373 | ||
| 356 | const GLuint program_handle = shader->GetHandle(); | 374 | const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0; |
| 357 | switch (program) { | 375 | switch (program) { |
| 358 | case Maxwell::ShaderProgram::VertexA: | 376 | case Maxwell::ShaderProgram::VertexA: |
| 359 | case Maxwell::ShaderProgram::VertexB: | 377 | case Maxwell::ShaderProgram::VertexB: |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4f082592f..a95646936 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 33 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/renderer_opengl/utils.h" | 35 | #include "video_core/renderer_opengl/utils.h" |
| 36 | #include "video_core/shader/async_shaders.h" | ||
| 36 | #include "video_core/textures/texture.h" | 37 | #include "video_core/textures/texture.h" |
| 37 | 38 | ||
| 38 | namespace Core { | 39 | namespace Core { |
| @@ -91,6 +92,14 @@ public: | |||
| 91 | return num_queued_commands > 0; | 92 | return num_queued_commands > 0; |
| 92 | } | 93 | } |
| 93 | 94 | ||
| 95 | VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { | ||
| 96 | return async_shaders; | ||
| 97 | } | ||
| 98 | |||
| 99 | const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { | ||
| 100 | return async_shaders; | ||
| 101 | } | ||
| 102 | |||
| 94 | private: | 103 | private: |
| 95 | /// Configures the color and depth framebuffer states. | 104 | /// Configures the color and depth framebuffer states. |
| 96 | void ConfigureFramebuffers(); | 105 | void ConfigureFramebuffers(); |
| @@ -242,6 +251,7 @@ private: | |||
| 242 | ScreenInfo& screen_info; | 251 | ScreenInfo& screen_info; |
| 243 | ProgramManager& program_manager; | 252 | ProgramManager& program_manager; |
| 244 | StateTracker& state_tracker; | 253 | StateTracker& state_tracker; |
| 254 | VideoCommon::Shader::AsyncShaders async_shaders; | ||
| 245 | 255 | ||
| 246 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 256 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 247 | 257 | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f8b322227..b05cb641c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -177,6 +177,12 @@ public: | |||
| 177 | Release(); | 177 | Release(); |
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | OGLAssemblyProgram& operator=(OGLAssemblyProgram&& o) noexcept { | ||
| 181 | Release(); | ||
| 182 | handle = std::exchange(o.handle, 0); | ||
| 183 | return *this; | ||
| 184 | } | ||
| 185 | |||
| 180 | /// Deletes the internal OpenGL resource | 186 | /// Deletes the internal OpenGL resource |
| 181 | void Release(); | 187 | void Release(); |
| 182 | 188 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c6a3bf3a1..f469ed656 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include "video_core/shader/registry.h" | 31 | #include "video_core/shader/registry.h" |
| 32 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
| 33 | #include "video_core/shader_cache.h" | 33 | #include "video_core/shader_cache.h" |
| 34 | #include "video_core/shader_notify.h" | ||
| 34 | 35 | ||
| 35 | namespace OpenGL { | 36 | namespace OpenGL { |
| 36 | 37 | ||
| @@ -140,9 +141,24 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | |||
| 140 | return registry; | 141 | return registry; |
| 141 | } | 142 | } |
| 142 | 143 | ||
| 144 | std::unordered_set<GLenum> GetSupportedFormats() { | ||
| 145 | GLint num_formats; | ||
| 146 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||
| 147 | |||
| 148 | std::vector<GLint> formats(num_formats); | ||
| 149 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||
| 150 | |||
| 151 | std::unordered_set<GLenum> supported_formats; | ||
| 152 | for (const GLint format : formats) { | ||
| 153 | supported_formats.insert(static_cast<GLenum>(format)); | ||
| 154 | } | ||
| 155 | return supported_formats; | ||
| 156 | } | ||
| 157 | |||
| 158 | } // Anonymous namespace | ||
| 159 | |||
| 143 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, | 160 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, |
| 144 | const ShaderIR& ir, const Registry& registry, | 161 | const ShaderIR& ir, const Registry& registry, bool hint_retrievable) { |
| 145 | bool hint_retrievable = false) { | ||
| 146 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | 162 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 147 | LOG_INFO(Render_OpenGL, "{}", shader_id); | 163 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 148 | 164 | ||
| @@ -181,30 +197,17 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u | |||
| 181 | return program; | 197 | return program; |
| 182 | } | 198 | } |
| 183 | 199 | ||
| 184 | std::unordered_set<GLenum> GetSupportedFormats() { | ||
| 185 | GLint num_formats; | ||
| 186 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||
| 187 | |||
| 188 | std::vector<GLint> formats(num_formats); | ||
| 189 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||
| 190 | |||
| 191 | std::unordered_set<GLenum> supported_formats; | ||
| 192 | for (const GLint format : formats) { | ||
| 193 | supported_formats.insert(static_cast<GLenum>(format)); | ||
| 194 | } | ||
| 195 | return supported_formats; | ||
| 196 | } | ||
| 197 | |||
| 198 | } // Anonymous namespace | ||
| 199 | |||
| 200 | Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, | 200 | Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, |
| 201 | ProgramSharedPtr program_) | 201 | ProgramSharedPtr program_, bool is_built) |
| 202 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { | 202 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)}, |
| 203 | is_built(is_built) { | ||
| 203 | handle = program->assembly_program.handle; | 204 | handle = program->assembly_program.handle; |
| 204 | if (handle == 0) { | 205 | if (handle == 0) { |
| 205 | handle = program->source_program.handle; | 206 | handle = program->source_program.handle; |
| 206 | } | 207 | } |
| 207 | ASSERT(handle != 0); | 208 | if (is_built) { |
| 209 | ASSERT(handle != 0); | ||
| 210 | } | ||
| 208 | } | 211 | } |
| 209 | 212 | ||
| 210 | Shader::~Shader() = default; | 213 | Shader::~Shader() = default; |
| @@ -214,42 +217,82 @@ GLuint Shader::GetHandle() const { | |||
| 214 | return handle; | 217 | return handle; |
| 215 | } | 218 | } |
| 216 | 219 | ||
| 217 | std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, | 220 | bool Shader::IsBuilt() const { |
| 218 | Maxwell::ShaderProgram program_type, | 221 | return is_built; |
| 219 | ProgramCode code, ProgramCode code_b) { | 222 | } |
| 223 | |||
| 224 | void Shader::AsyncOpenGLBuilt(OGLProgram new_program) { | ||
| 225 | program->source_program = std::move(new_program); | ||
| 226 | handle = program->source_program.handle; | ||
| 227 | is_built = true; | ||
| 228 | } | ||
| 229 | |||
| 230 | void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) { | ||
| 231 | program->assembly_program = std::move(new_program); | ||
| 232 | handle = program->assembly_program.handle; | ||
| 233 | is_built = true; | ||
| 234 | } | ||
| 235 | |||
| 236 | std::unique_ptr<Shader> Shader::CreateStageFromMemory( | ||
| 237 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, | ||
| 238 | ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { | ||
| 220 | const auto shader_type = GetShaderType(program_type); | 239 | const auto shader_type = GetShaderType(program_type); |
| 221 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 240 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 222 | 241 | ||
| 223 | auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); | 242 | auto& gpu = params.system.GPU(); |
| 224 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | 243 | gpu.ShaderNotify().MarkSharderBuilding(); |
| 225 | // TODO(Rodrigo): Handle VertexA shaders | 244 | |
| 226 | // std::optional<ShaderIR> ir_b; | 245 | auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); |
| 227 | // if (!code_b.empty()) { | 246 | if (!async_shaders.IsShaderAsync(params.system.GPU()) || |
| 228 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | 247 | !params.device.UseAsynchronousShaders()) { |
| 229 | // } | 248 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 230 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | 249 | // TODO(Rodrigo): Handle VertexA shaders |
| 250 | // std::optional<ShaderIR> ir_b; | ||
| 251 | // if (!code_b.empty()) { | ||
| 252 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | ||
| 253 | // } | ||
| 254 | auto program = | ||
| 255 | BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | ||
| 256 | ShaderDiskCacheEntry entry; | ||
| 257 | entry.type = shader_type; | ||
| 258 | entry.code = std::move(code); | ||
| 259 | entry.code_b = std::move(code_b); | ||
| 260 | entry.unique_identifier = params.unique_identifier; | ||
| 261 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 262 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 263 | entry.keys = registry->GetKeys(); | ||
| 264 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 265 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 266 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 267 | |||
| 268 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 269 | |||
| 270 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | ||
| 271 | MakeEntries(params.device, ir, shader_type), | ||
| 272 | std::move(program), true)); | ||
| 273 | } else { | ||
| 274 | // Required for entries | ||
| 275 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||
| 276 | auto entries = MakeEntries(params.device, ir, shader_type); | ||
| 231 | 277 | ||
| 232 | ShaderDiskCacheEntry entry; | 278 | async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier, |
| 233 | entry.type = shader_type; | 279 | std::move(code), std::move(code_b), STAGE_MAIN_OFFSET, |
| 234 | entry.code = std::move(code); | 280 | COMPILER_SETTINGS, *registry, cpu_addr); |
| 235 | entry.code_b = std::move(code_b); | ||
| 236 | entry.unique_identifier = params.unique_identifier; | ||
| 237 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 238 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 239 | entry.keys = registry->GetKeys(); | ||
| 240 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 241 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 242 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 243 | 281 | ||
| 244 | return std::unique_ptr<Shader>(new Shader( | 282 | auto program = std::make_shared<ProgramHandle>(); |
| 245 | std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); | 283 | return std::unique_ptr<Shader>( |
| 284 | new Shader(std::move(registry), std::move(entries), std::move(program), false)); | ||
| 285 | } | ||
| 246 | } | 286 | } |
| 247 | 287 | ||
| 248 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, | 288 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, |
| 249 | ProgramCode code) { | 289 | ProgramCode code) { |
| 250 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 290 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 251 | 291 | ||
| 252 | auto& engine = params.system.GPU().KeplerCompute(); | 292 | auto& gpu = params.system.GPU(); |
| 293 | gpu.ShaderNotify().MarkSharderBuilding(); | ||
| 294 | |||
| 295 | auto& engine = gpu.KeplerCompute(); | ||
| 253 | auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); | 296 | auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); |
| 254 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | 297 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 255 | const u64 uid = params.unique_identifier; | 298 | const u64 uid = params.unique_identifier; |
| @@ -266,6 +309,8 @@ std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& p | |||
| 266 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 309 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 267 | params.disk_cache.SaveEntry(std::move(entry)); | 310 | params.disk_cache.SaveEntry(std::move(entry)); |
| 268 | 311 | ||
| 312 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 313 | |||
| 269 | return std::unique_ptr<Shader>(new Shader(std::move(registry), | 314 | return std::unique_ptr<Shader>(new Shader(std::move(registry), |
| 270 | MakeEntries(params.device, ir, ShaderType::Compute), | 315 | MakeEntries(params.device, ir, ShaderType::Compute), |
| 271 | std::move(program))); | 316 | std::move(program))); |
| @@ -436,14 +481,51 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 436 | return program; | 481 | return program; |
| 437 | } | 482 | } |
| 438 | 483 | ||
| 439 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 484 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, |
| 485 | VideoCommon::Shader::AsyncShaders& async_shaders) { | ||
| 440 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { | 486 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { |
| 441 | return last_shaders[static_cast<std::size_t>(program)]; | 487 | auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; |
| 488 | if (last_shader->IsBuilt()) { | ||
| 489 | return last_shader; | ||
| 490 | } | ||
| 442 | } | 491 | } |
| 443 | 492 | ||
| 444 | auto& memory_manager{system.GPU().MemoryManager()}; | 493 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 445 | const GPUVAddr address{GetShaderAddress(system, program)}; | 494 | const GPUVAddr address{GetShaderAddress(system, program)}; |
| 446 | 495 | ||
| 496 | if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { | ||
| 497 | auto completed_work = async_shaders.GetCompletedWork(); | ||
| 498 | for (auto& work : completed_work) { | ||
| 499 | Shader* shader = TryGet(work.cpu_address); | ||
| 500 | auto& gpu = system.GPU(); | ||
| 501 | gpu.ShaderNotify().MarkShaderComplete(); | ||
| 502 | if (shader == nullptr) { | ||
| 503 | continue; | ||
| 504 | } | ||
| 505 | using namespace VideoCommon::Shader; | ||
| 506 | if (work.backend == AsyncShaders::Backend::OpenGL) { | ||
| 507 | shader->AsyncOpenGLBuilt(std::move(work.program.opengl)); | ||
| 508 | } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||
| 509 | shader->AsyncGLASMBuilt(std::move(work.program.glasm)); | ||
| 510 | } | ||
| 511 | |||
| 512 | ShaderDiskCacheEntry entry; | ||
| 513 | entry.type = work.shader_type; | ||
| 514 | entry.code = std::move(work.code); | ||
| 515 | entry.code_b = std::move(work.code_b); | ||
| 516 | entry.unique_identifier = work.uid; | ||
| 517 | |||
| 518 | auto& registry = shader->GetRegistry(); | ||
| 519 | |||
| 520 | entry.bound_buffer = registry.GetBoundBuffer(); | ||
| 521 | entry.graphics_info = registry.GetGraphicsInfo(); | ||
| 522 | entry.keys = registry.GetKeys(); | ||
| 523 | entry.bound_samplers = registry.GetBoundSamplers(); | ||
| 524 | entry.bindless_samplers = registry.GetBindlessSamplers(); | ||
| 525 | disk_cache.SaveEntry(std::move(entry)); | ||
| 526 | } | ||
| 527 | } | ||
| 528 | |||
| 447 | // Look up shader in the cache based on address | 529 | // Look up shader in the cache based on address |
| 448 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | 530 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 449 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { | 531 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { |
| @@ -471,7 +553,8 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 471 | std::unique_ptr<Shader> shader; | 553 | std::unique_ptr<Shader> shader; |
| 472 | const auto found = runtime_cache.find(unique_identifier); | 554 | const auto found = runtime_cache.find(unique_identifier); |
| 473 | if (found == runtime_cache.end()) { | 555 | if (found == runtime_cache.end()) { |
| 474 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); | 556 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b), |
| 557 | async_shaders, cpu_addr.value_or(0)); | ||
| 475 | } else { | 558 | } else { |
| 476 | shader = Shader::CreateFromCache(params, found->second); | 559 | shader = Shader::CreateFromCache(params, found->second); |
| 477 | } | 560 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 994aaeaf2..7528ac686 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -33,6 +33,10 @@ namespace Core::Frontend { | |||
| 33 | class EmuWindow; | 33 | class EmuWindow; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | namespace VideoCommon::Shader { | ||
| 37 | class AsyncShaders; | ||
| 38 | } | ||
| 39 | |||
| 36 | namespace OpenGL { | 40 | namespace OpenGL { |
| 37 | 41 | ||
| 38 | class Device; | 42 | class Device; |
| @@ -61,6 +65,11 @@ struct ShaderParameters { | |||
| 61 | u64 unique_identifier; | 65 | u64 unique_identifier; |
| 62 | }; | 66 | }; |
| 63 | 67 | ||
| 68 | ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 69 | u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir, | ||
| 70 | const VideoCommon::Shader::Registry& registry, | ||
| 71 | bool hint_retrievable = false); | ||
| 72 | |||
| 64 | class Shader final { | 73 | class Shader final { |
| 65 | public: | 74 | public: |
| 66 | ~Shader(); | 75 | ~Shader(); |
| @@ -68,15 +77,28 @@ public: | |||
| 68 | /// Gets the GL program handle for the shader | 77 | /// Gets the GL program handle for the shader |
| 69 | GLuint GetHandle() const; | 78 | GLuint GetHandle() const; |
| 70 | 79 | ||
| 80 | bool IsBuilt() const; | ||
| 81 | |||
| 71 | /// Gets the shader entries for the shader | 82 | /// Gets the shader entries for the shader |
| 72 | const ShaderEntries& GetEntries() const { | 83 | const ShaderEntries& GetEntries() const { |
| 73 | return entries; | 84 | return entries; |
| 74 | } | 85 | } |
| 75 | 86 | ||
| 76 | static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, | 87 | const VideoCommon::Shader::Registry& GetRegistry() const { |
| 77 | Maxwell::ShaderProgram program_type, | 88 | return *registry; |
| 78 | ProgramCode program_code, | 89 | } |
| 79 | ProgramCode program_code_b); | 90 | |
| 91 | /// Mark a OpenGL shader as built | ||
| 92 | void AsyncOpenGLBuilt(OGLProgram new_program); | ||
| 93 | |||
| 94 | /// Mark a GLASM shader as built | ||
| 95 | void AsyncGLASMBuilt(OGLAssemblyProgram new_program); | ||
| 96 | |||
| 97 | static std::unique_ptr<Shader> CreateStageFromMemory( | ||
| 98 | const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||
| 99 | ProgramCode program_code, ProgramCode program_code_b, | ||
| 100 | VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr); | ||
| 101 | |||
| 80 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | 102 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, |
| 81 | ProgramCode code); | 103 | ProgramCode code); |
| 82 | 104 | ||
| @@ -85,12 +107,13 @@ public: | |||
| 85 | 107 | ||
| 86 | private: | 108 | private: |
| 87 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, | 109 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, |
| 88 | ProgramSharedPtr program); | 110 | ProgramSharedPtr program, bool is_built = true); |
| 89 | 111 | ||
| 90 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 112 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 91 | ShaderEntries entries; | 113 | ShaderEntries entries; |
| 92 | ProgramSharedPtr program; | 114 | ProgramSharedPtr program; |
| 93 | GLuint handle = 0; | 115 | GLuint handle = 0; |
| 116 | bool is_built{}; | ||
| 94 | }; | 117 | }; |
| 95 | 118 | ||
| 96 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { | 119 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { |
| @@ -104,7 +127,8 @@ public: | |||
| 104 | const VideoCore::DiskResourceLoadCallback& callback); | 127 | const VideoCore::DiskResourceLoadCallback& callback); |
| 105 | 128 | ||
| 106 | /// Gets the current specified shader stage program | 129 | /// Gets the current specified shader stage program |
| 107 | Shader* GetStageProgram(Maxwell::ShaderProgram program); | 130 | Shader* GetStageProgram(Maxwell::ShaderProgram program, |
| 131 | VideoCommon::Shader::AsyncShaders& async_shaders); | ||
| 108 | 132 | ||
| 109 | /// Gets a compute kernel in the passed address | 133 | /// Gets a compute kernel in the passed address |
| 110 | Shader* GetComputeKernel(GPUVAddr code_addr); | 134 | Shader* GetComputeKernel(GPUVAddr code_addr); |
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..fb94ac2e7 --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <chrono> | ||
| 6 | #include "video_core/engines/maxwell_3d.h" | ||
| 7 | #include "video_core/renderer_base.h" | ||
| 8 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 9 | #include "video_core/shader/async_shaders.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} | ||
| 13 | AsyncShaders::~AsyncShaders() { | ||
| 14 | KillWorkers(); | ||
| 15 | } | ||
| 16 | |||
| 17 | void AsyncShaders::AllocateWorkers(std::size_t num_workers) { | ||
| 18 | // If we're already have workers queued or don't want to queue workers, ignore | ||
| 19 | if (num_workers == worker_threads.size() || num_workers == 0) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | |||
| 23 | // If workers already exist, clear them | ||
| 24 | if (!worker_threads.empty()) { | ||
| 25 | FreeWorkers(); | ||
| 26 | } | ||
| 27 | |||
| 28 | // Create workers | ||
| 29 | for (std::size_t i = 0; i < num_workers; i++) { | ||
| 30 | context_list.push_back(emu_window.CreateSharedContext()); | ||
| 31 | worker_threads.push_back(std::move( | ||
| 32 | std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 | void AsyncShaders::FreeWorkers() { | ||
| 37 | // Mark all threads to quit | ||
| 38 | is_thread_exiting.store(true); | ||
| 39 | for (auto& thread : worker_threads) { | ||
| 40 | thread.join(); | ||
| 41 | } | ||
| 42 | // Clear our shared contexts | ||
| 43 | context_list.clear(); | ||
| 44 | |||
| 45 | // Clear our worker threads | ||
| 46 | worker_threads.clear(); | ||
| 47 | } | ||
| 48 | |||
| 49 | void AsyncShaders::KillWorkers() { | ||
| 50 | is_thread_exiting.store(true); | ||
| 51 | for (auto& thread : worker_threads) { | ||
| 52 | thread.detach(); | ||
| 53 | } | ||
| 54 | // Clear our shared contexts | ||
| 55 | context_list.clear(); | ||
| 56 | |||
| 57 | // Clear our worker threads | ||
| 58 | worker_threads.clear(); | ||
| 59 | } | ||
| 60 | |||
| 61 | bool AsyncShaders::HasWorkQueued() { | ||
| 62 | std::shared_lock lock(queue_mutex); | ||
| 63 | return !pending_queue.empty(); | ||
| 64 | } | ||
| 65 | |||
| 66 | bool AsyncShaders::HasCompletedWork() { | ||
| 67 | std::shared_lock lock(completed_mutex); | ||
| 68 | return !finished_work.empty(); | ||
| 69 | } | ||
| 70 | |||
| 71 | bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { | ||
| 72 | const auto& regs = gpu.Maxwell3D().regs; | ||
| 73 | |||
| 74 | // If something is using depth, we can assume that games are not rendering anything which will | ||
| 75 | // be used one time. | ||
| 76 | if (regs.zeta_enable) { | ||
| 77 | return true; | ||
| 78 | } | ||
| 79 | |||
| 80 | // If games are using a small index count, we can assume these are full screen quads. Usually | ||
| 81 | // these shaders are only used once for building textures so we can assume they can't be built | ||
| 82 | // async | ||
| 83 | if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { | ||
| 84 | return false; | ||
| 85 | } | ||
| 86 | |||
| 87 | return true; | ||
| 88 | } | ||
| 89 | |||
| 90 | std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { | ||
| 91 | std::vector<AsyncShaders::Result> results; | ||
| 92 | { | ||
| 93 | std::unique_lock lock(completed_mutex); | ||
| 94 | results.assign(std::make_move_iterator(finished_work.begin()), | ||
| 95 | std::make_move_iterator(finished_work.end())); | ||
| 96 | finished_work.clear(); | ||
| 97 | } | ||
| 98 | return results; | ||
| 99 | } | ||
| 100 | |||
| 101 | void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | ||
| 102 | Tegra::Engines::ShaderType shader_type, u64 uid, | ||
| 103 | std::vector<u64> code, std::vector<u64> code_b, | ||
| 104 | u32 main_offset, | ||
| 105 | VideoCommon::Shader::CompilerSettings compiler_settings, | ||
| 106 | const VideoCommon::Shader::Registry& registry, | ||
| 107 | VAddr cpu_addr) { | ||
| 108 | WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM | ||
| 109 | : AsyncShaders::Backend::OpenGL, | ||
| 110 | device, | ||
| 111 | shader_type, | ||
| 112 | uid, | ||
| 113 | std::move(code), | ||
| 114 | std::move(code_b), | ||
| 115 | main_offset, | ||
| 116 | compiler_settings, | ||
| 117 | registry, | ||
| 118 | cpu_addr}; | ||
| 119 | std::unique_lock lock(queue_mutex); | ||
| 120 | pending_queue.push_back(std::move(params)); | ||
| 121 | } | ||
| 122 | |||
| 123 | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||
| 124 | using namespace std::chrono_literals; | ||
| 125 | while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||
| 126 | // Partial lock to allow all threads to read at the same time | ||
| 127 | if (!HasWorkQueued()) { | ||
| 128 | continue; | ||
| 129 | } | ||
| 130 | // Complete lock for pulling workload | ||
| 131 | queue_mutex.lock(); | ||
| 132 | // Another thread beat us, just unlock and wait for the next load | ||
| 133 | if (pending_queue.empty()) { | ||
| 134 | queue_mutex.unlock(); | ||
| 135 | continue; | ||
| 136 | } | ||
| 137 | // Pull work from queue | ||
| 138 | WorkerParams work = std::move(pending_queue.front()); | ||
| 139 | pending_queue.pop_front(); | ||
| 140 | queue_mutex.unlock(); | ||
| 141 | |||
| 142 | if (work.backend == AsyncShaders::Backend::OpenGL || | ||
| 143 | work.backend == AsyncShaders::Backend::GLASM) { | ||
| 144 | const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); | ||
| 145 | const auto scope = context->Acquire(); | ||
| 146 | auto program = | ||
| 147 | OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); | ||
| 148 | Result result{}; | ||
| 149 | result.backend = work.backend; | ||
| 150 | result.cpu_address = work.cpu_address; | ||
| 151 | result.uid = work.uid; | ||
| 152 | result.code = std::move(work.code); | ||
| 153 | result.code_b = std::move(work.code_b); | ||
| 154 | result.shader_type = work.shader_type; | ||
| 155 | |||
| 156 | if (work.backend == AsyncShaders::Backend::OpenGL) { | ||
| 157 | result.program.opengl = std::move(program->source_program); | ||
| 158 | } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||
| 159 | result.program.glasm = std::move(program->assembly_program); | ||
| 160 | } | ||
| 161 | |||
| 162 | { | ||
| 163 | std::unique_lock complete_lock(completed_mutex); | ||
| 164 | finished_work.push_back(std::move(result)); | ||
| 165 | } | ||
| 166 | } | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..26bc38326 --- /dev/null +++ b/src/video_core/shader/async_shaders.h | |||
| @@ -0,0 +1,107 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <deque> | ||
| 8 | #include <memory> | ||
| 9 | #include <shared_mutex> | ||
| 10 | #include <thread> | ||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 16 | |||
| 17 | namespace Core::Frontend { | ||
| 18 | class EmuWindow; | ||
| 19 | class GraphicsContext; | ||
| 20 | } // namespace Core::Frontend | ||
| 21 | |||
| 22 | namespace Tegra { | ||
| 23 | class GPU; | ||
| 24 | } | ||
| 25 | |||
| 26 | namespace VideoCommon::Shader { | ||
| 27 | |||
| 28 | class AsyncShaders { | ||
| 29 | public: | ||
| 30 | enum class Backend { | ||
| 31 | OpenGL, | ||
| 32 | GLASM, | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct ResultPrograms { | ||
| 36 | OpenGL::OGLProgram opengl; | ||
| 37 | OpenGL::OGLAssemblyProgram glasm; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct Result { | ||
| 41 | u64 uid; | ||
| 42 | VAddr cpu_address; | ||
| 43 | Backend backend; | ||
| 44 | ResultPrograms program; | ||
| 45 | std::vector<u64> code; | ||
| 46 | std::vector<u64> code_b; | ||
| 47 | Tegra::Engines::ShaderType shader_type; | ||
| 48 | }; | ||
| 49 | |||
| 50 | explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); | ||
| 51 | ~AsyncShaders(); | ||
| 52 | |||
| 53 | /// Start up shader worker threads | ||
| 54 | void AllocateWorkers(std::size_t num_workers); | ||
| 55 | |||
| 56 | /// Clear the shader queue and kill all worker threads | ||
| 57 | void FreeWorkers(); | ||
| 58 | |||
| 59 | // Force end all threads | ||
| 60 | void KillWorkers(); | ||
| 61 | |||
| 62 | /// Check our worker queue to see if we have any work queued already | ||
| 63 | bool HasWorkQueued(); | ||
| 64 | |||
| 65 | /// Check to see if any shaders have actually been compiled | ||
| 66 | bool HasCompletedWork(); | ||
| 67 | |||
| 68 | /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build | ||
| 69 | /// every shader async as some shaders are only built and executed once. We try to "guess" which | ||
| 70 | /// shader would be used only once | ||
| 71 | bool IsShaderAsync(const Tegra::GPU& gpu) const; | ||
| 72 | |||
| 73 | /// Pulls completed compiled shaders | ||
| 74 | std::vector<Result> GetCompletedWork(); | ||
| 75 | |||
| 76 | void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 77 | u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, | ||
| 78 | VideoCommon::Shader::CompilerSettings compiler_settings, | ||
| 79 | const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); | ||
| 80 | |||
| 81 | private: | ||
| 82 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||
| 83 | |||
| 84 | struct WorkerParams { | ||
| 85 | AsyncShaders::Backend backend; | ||
| 86 | OpenGL::Device device; | ||
| 87 | Tegra::Engines::ShaderType shader_type; | ||
| 88 | u64 uid; | ||
| 89 | std::vector<u64> code; | ||
| 90 | std::vector<u64> code_b; | ||
| 91 | u32 main_offset; | ||
| 92 | VideoCommon::Shader::CompilerSettings compiler_settings; | ||
| 93 | VideoCommon::Shader::Registry registry; | ||
| 94 | VAddr cpu_address; | ||
| 95 | }; | ||
| 96 | |||
| 97 | std::shared_mutex queue_mutex; | ||
| 98 | std::shared_mutex completed_mutex; | ||
| 99 | std::atomic<bool> is_thread_exiting{}; | ||
| 100 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||
| 101 | std::vector<std::thread> worker_threads; | ||
| 102 | std::deque<WorkerParams> pending_queue; | ||
| 103 | std::vector<AsyncShaders::Result> finished_work; | ||
| 104 | Core::Frontend::EmuWindow& emu_window; | ||
| 105 | }; | ||
| 106 | |||
| 107 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp new file mode 100644 index 000000000..46fd0baae --- /dev/null +++ b/src/video_core/shader_notify.cpp | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/shader_notify.h" | ||
| 6 | |||
| 7 | using namespace std::chrono_literals; | ||
| 8 | |||
| 9 | namespace VideoCore { | ||
| 10 | namespace { | ||
| 11 | constexpr auto UPDATE_TICK = 32ms; | ||
| 12 | } | ||
| 13 | |||
| 14 | ShaderNotify::ShaderNotify() = default; | ||
| 15 | ShaderNotify::~ShaderNotify() = default; | ||
| 16 | |||
| 17 | std::size_t ShaderNotify::GetShadersBuilding() { | ||
| 18 | const auto now = std::chrono::high_resolution_clock::now(); | ||
| 19 | const auto diff = now - last_update; | ||
| 20 | if (diff > UPDATE_TICK) { | ||
| 21 | std::shared_lock lock(mutex); | ||
| 22 | last_updated_count = accurate_count; | ||
| 23 | } | ||
| 24 | return last_updated_count; | ||
| 25 | } | ||
| 26 | |||
| 27 | std::size_t ShaderNotify::GetShadersBuildingAccurate() { | ||
| 28 | std::shared_lock lock(mutex); | ||
| 29 | return accurate_count; | ||
| 30 | } | ||
| 31 | |||
| 32 | void ShaderNotify::MarkShaderComplete() { | ||
| 33 | std::unique_lock lock(mutex); | ||
| 34 | accurate_count--; | ||
| 35 | } | ||
| 36 | |||
| 37 | void ShaderNotify::MarkSharderBuilding() { | ||
| 38 | std::unique_lock lock(mutex); | ||
| 39 | accurate_count++; | ||
| 40 | } | ||
| 41 | |||
| 42 | } // namespace VideoCore | ||
diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h new file mode 100644 index 000000000..a9c92d179 --- /dev/null +++ b/src/video_core/shader_notify.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <chrono> | ||
| 8 | #include <shared_mutex> | ||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace VideoCore { | ||
| 12 | class ShaderNotify { | ||
| 13 | public: | ||
| 14 | ShaderNotify(); | ||
| 15 | ~ShaderNotify(); | ||
| 16 | |||
| 17 | std::size_t GetShadersBuilding(); | ||
| 18 | std::size_t GetShadersBuildingAccurate(); | ||
| 19 | |||
| 20 | void MarkShaderComplete(); | ||
| 21 | void MarkSharderBuilding(); | ||
| 22 | |||
| 23 | private: | ||
| 24 | std::size_t last_updated_count{}; | ||
| 25 | std::size_t accurate_count{}; | ||
| 26 | std::shared_mutex mutex; | ||
| 27 | std::chrono::high_resolution_clock::time_point last_update{}; | ||
| 28 | }; | ||
| 29 | } // namespace VideoCore | ||