diff options
| author | 2020-07-18 00:48:27 -0400 | |
|---|---|---|
| committer | 2020-07-18 00:48:27 -0400 | |
| commit | 90cbcaa44a3901a832556258b5b97d8d7de34ca9 (patch) | |
| tree | 570ff95dae035757fb2831804aae4f4ca681d354 /src/video_core/shader | |
| parent | Merge pull request #4364 from lioncash/desig5 (diff) | |
| parent | Fix style issues (diff) | |
| download | yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.tar.gz yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.tar.xz yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.zip | |
Merge pull request #4273 from ogniK5377/async-shaders-prod
video_core: Add asynchronous shader decompilation and compilation
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/async_shaders.cpp | 181 | ||||
| -rw-r--r-- | src/video_core/shader/async_shaders.h | 109 |
2 files changed, 290 insertions, 0 deletions
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..b7f66d7ee --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <chrono> | ||
| 6 | #include <condition_variable> | ||
| 7 | #include <mutex> | ||
| 8 | #include <thread> | ||
| 9 | #include <vector> | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/renderer_base.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 13 | #include "video_core/shader/async_shaders.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {} | ||
| 18 | |||
| 19 | AsyncShaders::~AsyncShaders() { | ||
| 20 | KillWorkers(); | ||
| 21 | } | ||
| 22 | |||
| 23 | void AsyncShaders::AllocateWorkers(std::size_t num_workers) { | ||
| 24 | // If we're already have workers queued or don't want to queue workers, ignore | ||
| 25 | if (num_workers == worker_threads.size() || num_workers == 0) { | ||
| 26 | return; | ||
| 27 | } | ||
| 28 | |||
| 29 | // If workers already exist, clear them | ||
| 30 | if (!worker_threads.empty()) { | ||
| 31 | FreeWorkers(); | ||
| 32 | } | ||
| 33 | |||
| 34 | // Create workers | ||
| 35 | for (std::size_t i = 0; i < num_workers; i++) { | ||
| 36 | context_list.push_back(emu_window.CreateSharedContext()); | ||
| 37 | worker_threads.push_back(std::move( | ||
| 38 | std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | void AsyncShaders::FreeWorkers() { | ||
| 43 | // Mark all threads to quit | ||
| 44 | is_thread_exiting.store(true); | ||
| 45 | cv.notify_all(); | ||
| 46 | for (auto& thread : worker_threads) { | ||
| 47 | thread.join(); | ||
| 48 | } | ||
| 49 | // Clear our shared contexts | ||
| 50 | context_list.clear(); | ||
| 51 | |||
| 52 | // Clear our worker threads | ||
| 53 | worker_threads.clear(); | ||
| 54 | } | ||
| 55 | |||
| 56 | void AsyncShaders::KillWorkers() { | ||
| 57 | is_thread_exiting.store(true); | ||
| 58 | for (auto& thread : worker_threads) { | ||
| 59 | thread.detach(); | ||
| 60 | } | ||
| 61 | // Clear our shared contexts | ||
| 62 | context_list.clear(); | ||
| 63 | |||
| 64 | // Clear our worker threads | ||
| 65 | worker_threads.clear(); | ||
| 66 | } | ||
| 67 | |||
| 68 | bool AsyncShaders::HasWorkQueued() { | ||
| 69 | return !pending_queue.empty(); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool AsyncShaders::HasCompletedWork() { | ||
| 73 | std::shared_lock lock{completed_mutex}; | ||
| 74 | return !finished_work.empty(); | ||
| 75 | } | ||
| 76 | |||
| 77 | bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { | ||
| 78 | const auto& regs = gpu.Maxwell3D().regs; | ||
| 79 | |||
| 80 | // If something is using depth, we can assume that games are not rendering anything which will | ||
| 81 | // be used one time. | ||
| 82 | if (regs.zeta_enable) { | ||
| 83 | return true; | ||
| 84 | } | ||
| 85 | |||
| 86 | // If games are using a small index count, we can assume these are full screen quads. Usually | ||
| 87 | // these shaders are only used once for building textures so we can assume they can't be built | ||
| 88 | // async | ||
| 89 | if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | return true; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { | ||
| 97 | std::vector<AsyncShaders::Result> results; | ||
| 98 | { | ||
| 99 | std::unique_lock lock{completed_mutex}; | ||
| 100 | results.assign(std::make_move_iterator(finished_work.begin()), | ||
| 101 | std::make_move_iterator(finished_work.end())); | ||
| 102 | finished_work.clear(); | ||
| 103 | } | ||
| 104 | return results; | ||
| 105 | } | ||
| 106 | |||
| 107 | void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | ||
| 108 | Tegra::Engines::ShaderType shader_type, u64 uid, | ||
| 109 | std::vector<u64> code, std::vector<u64> code_b, | ||
| 110 | u32 main_offset, | ||
| 111 | VideoCommon::Shader::CompilerSettings compiler_settings, | ||
| 112 | const VideoCommon::Shader::Registry& registry, | ||
| 113 | VAddr cpu_addr) { | ||
| 114 | WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM | ||
| 115 | : AsyncShaders::Backend::OpenGL, | ||
| 116 | device, | ||
| 117 | shader_type, | ||
| 118 | uid, | ||
| 119 | std::move(code), | ||
| 120 | std::move(code_b), | ||
| 121 | main_offset, | ||
| 122 | compiler_settings, | ||
| 123 | registry, | ||
| 124 | cpu_addr}; | ||
| 125 | std::unique_lock lock(queue_mutex); | ||
| 126 | pending_queue.push_back(std::move(params)); | ||
| 127 | cv.notify_one(); | ||
| 128 | } | ||
| 129 | |||
| 130 | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||
| 131 | using namespace std::chrono_literals; | ||
| 132 | while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||
| 133 | std::unique_lock lock{queue_mutex}; | ||
| 134 | cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); | ||
| 135 | if (is_thread_exiting) { | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | |||
| 139 | // Partial lock to allow all threads to read at the same time | ||
| 140 | if (!HasWorkQueued()) { | ||
| 141 | continue; | ||
| 142 | } | ||
| 143 | // Another thread beat us, just unlock and wait for the next load | ||
| 144 | if (pending_queue.empty()) { | ||
| 145 | continue; | ||
| 146 | } | ||
| 147 | // Pull work from queue | ||
| 148 | WorkerParams work = std::move(pending_queue.front()); | ||
| 149 | pending_queue.pop_front(); | ||
| 150 | |||
| 151 | lock.unlock(); | ||
| 152 | |||
| 153 | if (work.backend == AsyncShaders::Backend::OpenGL || | ||
| 154 | work.backend == AsyncShaders::Backend::GLASM) { | ||
| 155 | const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); | ||
| 156 | const auto scope = context->Acquire(); | ||
| 157 | auto program = | ||
| 158 | OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); | ||
| 159 | Result result{}; | ||
| 160 | result.backend = work.backend; | ||
| 161 | result.cpu_address = work.cpu_address; | ||
| 162 | result.uid = work.uid; | ||
| 163 | result.code = std::move(work.code); | ||
| 164 | result.code_b = std::move(work.code_b); | ||
| 165 | result.shader_type = work.shader_type; | ||
| 166 | |||
| 167 | if (work.backend == AsyncShaders::Backend::OpenGL) { | ||
| 168 | result.program.opengl = std::move(program->source_program); | ||
| 169 | } else if (work.backend == AsyncShaders::Backend::GLASM) { | ||
| 170 | result.program.glasm = std::move(program->assembly_program); | ||
| 171 | } | ||
| 172 | |||
| 173 | { | ||
| 174 | std::unique_lock complete_lock(completed_mutex); | ||
| 175 | finished_work.push_back(std::move(result)); | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..2f5ee94ad --- /dev/null +++ b/src/video_core/shader/async_shaders.h | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <condition_variable> | ||
| 8 | #include <deque> | ||
| 9 | #include <memory> | ||
| 10 | #include <shared_mutex> | ||
| 11 | #include <thread> | ||
| 12 | #include "common/bit_field.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 17 | |||
| 18 | namespace Core::Frontend { | ||
| 19 | class EmuWindow; | ||
| 20 | class GraphicsContext; | ||
| 21 | } // namespace Core::Frontend | ||
| 22 | |||
| 23 | namespace Tegra { | ||
| 24 | class GPU; | ||
| 25 | } | ||
| 26 | |||
| 27 | namespace VideoCommon::Shader { | ||
| 28 | |||
| 29 | class AsyncShaders { | ||
| 30 | public: | ||
| 31 | enum class Backend { | ||
| 32 | OpenGL, | ||
| 33 | GLASM, | ||
| 34 | }; | ||
| 35 | |||
| 36 | struct ResultPrograms { | ||
| 37 | OpenGL::OGLProgram opengl; | ||
| 38 | OpenGL::OGLAssemblyProgram glasm; | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct Result { | ||
| 42 | u64 uid; | ||
| 43 | VAddr cpu_address; | ||
| 44 | Backend backend; | ||
| 45 | ResultPrograms program; | ||
| 46 | std::vector<u64> code; | ||
| 47 | std::vector<u64> code_b; | ||
| 48 | Tegra::Engines::ShaderType shader_type; | ||
| 49 | }; | ||
| 50 | |||
| 51 | explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window); | ||
| 52 | ~AsyncShaders(); | ||
| 53 | |||
| 54 | /// Start up shader worker threads | ||
| 55 | void AllocateWorkers(std::size_t num_workers); | ||
| 56 | |||
| 57 | /// Clear the shader queue and kill all worker threads | ||
| 58 | void FreeWorkers(); | ||
| 59 | |||
| 60 | // Force end all threads | ||
| 61 | void KillWorkers(); | ||
| 62 | |||
| 63 | /// Check to see if any shaders have actually been compiled | ||
| 64 | bool HasCompletedWork(); | ||
| 65 | |||
| 66 | /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build | ||
| 67 | /// every shader async as some shaders are only built and executed once. We try to "guess" which | ||
| 68 | /// shader would be used only once | ||
| 69 | bool IsShaderAsync(const Tegra::GPU& gpu) const; | ||
| 70 | |||
| 71 | /// Pulls completed compiled shaders | ||
| 72 | std::vector<Result> GetCompletedWork(); | ||
| 73 | |||
| 74 | void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 75 | u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, | ||
| 76 | VideoCommon::Shader::CompilerSettings compiler_settings, | ||
| 77 | const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); | ||
| 78 | |||
| 79 | private: | ||
| 80 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||
| 81 | |||
| 82 | /// Check our worker queue to see if we have any work queued already | ||
| 83 | bool HasWorkQueued(); | ||
| 84 | |||
| 85 | struct WorkerParams { | ||
| 86 | AsyncShaders::Backend backend; | ||
| 87 | OpenGL::Device device; | ||
| 88 | Tegra::Engines::ShaderType shader_type; | ||
| 89 | u64 uid; | ||
| 90 | std::vector<u64> code; | ||
| 91 | std::vector<u64> code_b; | ||
| 92 | u32 main_offset; | ||
| 93 | VideoCommon::Shader::CompilerSettings compiler_settings; | ||
| 94 | VideoCommon::Shader::Registry registry; | ||
| 95 | VAddr cpu_address; | ||
| 96 | }; | ||
| 97 | |||
| 98 | std::condition_variable cv; | ||
| 99 | std::mutex queue_mutex; | ||
| 100 | std::shared_mutex completed_mutex; | ||
| 101 | std::atomic<bool> is_thread_exiting{}; | ||
| 102 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||
| 103 | std::vector<std::thread> worker_threads; | ||
| 104 | std::deque<WorkerParams> pending_queue; | ||
| 105 | std::vector<AsyncShaders::Result> finished_work; | ||
| 106 | Core::Frontend::EmuWindow& emu_window; | ||
| 107 | }; | ||
| 108 | |||
| 109 | } // namespace VideoCommon::Shader | ||