Merge pull request #4273 from ogniK5377/async-shaders-prod

video_core: Add asynchronous shader decompilation and compilation
author: bunnei 2020-07-18 00:48:27 -0400
committer: GitHub 2020-07-18 00:48:27 -0400
commit: 90cbcaa44a3901a832556258b5b97d8d7de34ca9 (patch)
tree: 570ff95dae035757fb2831804aae4f4ca681d354 /src/video_core/shader
parent: Merge pull request #4364 from lioncash/desig5 (diff)
parent: Fix style issues (diff)
download: yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.tar.gz
yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.tar.xz
yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.zip
2 files changed, 290 insertions, 0 deletions
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..b7f66d7ee
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,181 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#include <chrono>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+namespace VideoCommon::Shader {
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+AsyncShaders::~AsyncShaders() {
+    KillWorkers();
+}
+void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
+    // If we're already have workers queued or don't want to queue workers, ignore
+    if (num_workers == worker_threads.size() || num_workers == 0) {
+        return;
+    }
+    // If workers already exist, clear them
+    if (!worker_threads.empty()) {
+        FreeWorkers();
+    }
+    // Create workers
+    for (std::size_t i = 0; i < num_workers; i++) {
+        context_list.push_back(emu_window.CreateSharedContext());
+        worker_threads.push_back(std::move(
+            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())));
+    }
+}
+void AsyncShaders::FreeWorkers() {
+    // Mark all threads to quit
+    is_thread_exiting.store(true);
+    cv.notify_all();
+    for (auto& thread : worker_threads) {
+        thread.join();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+    // Clear our worker threads
+    worker_threads.clear();
+}
+void AsyncShaders::KillWorkers() {
+    is_thread_exiting.store(true);
+    for (auto& thread : worker_threads) {
+        thread.detach();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+    // Clear our worker threads
+    worker_threads.clear();
+}
+bool AsyncShaders::HasWorkQueued() {
+    return !pending_queue.empty();
+}
+bool AsyncShaders::HasCompletedWork() {
+    std::shared_lock lock{completed_mutex};
+    return !finished_work.empty();
+}
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+    const auto& regs = gpu.Maxwell3D().regs;
+    // If something is using depth, we can assume that games are not rendering anything which will
+    // be used one time.
+    if (regs.zeta_enable) {
+        return true;
+    }
+    // If games are using a small index count, we can assume these are full screen quads. Usually
+    // these shaders are only used once for building textures so we can assume they can't be built
+    // async
+    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+        return false;
+    }
+    return true;
+}
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+    std::vector<AsyncShaders::Result> results;
+    {
+        std::unique_lock lock{completed_mutex};
+        results.assign(std::make_move_iterator(finished_work.begin()),
+                       std::make_move_iterator(finished_work.end()));
+        finished_work.clear();
+    }
+    return results;
+}
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+                                     Tegra::Engines::ShaderType shader_type, u64 uid,
+                                     std::vector<u64> code, std::vector<u64> code_b,
+                                     u32 main_offset,
+                                     VideoCommon::Shader::CompilerSettings compiler_settings,
+                                     const VideoCommon::Shader::Registry& registry,
+                                     VAddr cpu_addr) {
+    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
+                                                    : AsyncShaders::Backend::OpenGL,
+                        device,
+                        shader_type,
+                        uid,
+                        std::move(code),
+                        std::move(code_b),
+                        main_offset,
+                        compiler_settings,
+                        registry,
+                        cpu_addr};
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push_back(std::move(params));
+    cv.notify_one();
+}
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+    using namespace std::chrono_literals;
+    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+        std::unique_lock lock{queue_mutex};
+        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
+        if (is_thread_exiting) {
+            return;
+        }
+        // Partial lock to allow all threads to read at the same time
+        if (!HasWorkQueued()) {
+            continue;
+        }
+        // Another thread beat us, just unlock and wait for the next load
+        if (pending_queue.empty()) {
+            continue;
+        }
+        // Pull work from queue
+        WorkerParams work = std::move(pending_queue.front());
+        pending_queue.pop_front();
+        lock.unlock();
+        if (work.backend == AsyncShaders::Backend::OpenGL ||
+            work.backend == AsyncShaders::Backend::GLASM) {
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
+            const auto scope = context->Acquire();
+            auto program =
+                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
+            Result result{};
+            result.backend = work.backend;
+            result.cpu_address = work.cpu_address;
+            result.uid = work.uid;
+            result.code = std::move(work.code);
+            result.code_b = std::move(work.code_b);
+            result.shader_type = work.shader_type;
+            if (work.backend == AsyncShaders::Backend::OpenGL) {
+                result.program.opengl = std::move(program->source_program);
+            } else if (work.backend == AsyncShaders::Backend::GLASM) {
+                result.program.glasm = std::move(program->assembly_program);
+            }
+            {
+                std::unique_lock complete_lock(completed_mutex);
+                finished_work.push_back(std::move(result));
+            }
+        }
+    }
+}
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..2f5ee94ad
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,109 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#pragma once
+#include <condition_variable>
+#include <deque>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+namespace Tegra {
+class GPU;
+}
+namespace VideoCommon::Shader {
+class AsyncShaders {
+public:
+    enum class Backend {
+        OpenGL,
+        GLASM,
+    };
+    struct ResultPrograms {
+        OpenGL::OGLProgram opengl;
+        OpenGL::OGLAssemblyProgram glasm;
+    };
+    struct Result {
+        u64 uid;
+        VAddr cpu_address;
+        Backend backend;
+        ResultPrograms program;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        Tegra::Engines::ShaderType shader_type;
+    };
+    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+    ~AsyncShaders();
+    /// Start up shader worker threads
+    void AllocateWorkers(std::size_t num_workers);
+    /// Clear the shader queue and kill all worker threads
+    void FreeWorkers();
+    // Force end all threads
+    void KillWorkers();
+    /// Check to see if any shaders have actually been compiled
+    bool HasCompletedWork();
+    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+    /// every shader async as some shaders are only built and executed once. We try to "guess" which
+    /// shader would be used only once
+    bool IsShaderAsync(const Tegra::GPU& gpu) const;
+    /// Pulls completed compiled shaders
+    std::vector<Result> GetCompletedWork();
+    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+                           VideoCommon::Shader::CompilerSettings compiler_settings,
+                           const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
+private:
+    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+    /// Check our worker queue to see if we have any work queued already
+    bool HasWorkQueued();
+    struct WorkerParams {
+        AsyncShaders::Backend backend;
+        OpenGL::Device device;
+        Tegra::Engines::ShaderType shader_type;
+        u64 uid;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        u32 main_offset;
+        VideoCommon::Shader::CompilerSettings compiler_settings;
+        VideoCommon::Shader::Registry registry;
+        VAddr cpu_address;
+    };
+    std::condition_variable cv;
+    std::mutex queue_mutex;
+    std::shared_mutex completed_mutex;
+    std::atomic<bool> is_thread_exiting{};
+    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+    std::vector<std::thread> worker_threads;
+    std::deque<WorkerParams> pending_queue;
+    std::vector<AsyncShaders::Result> finished_work;
+    Core::Frontend::EmuWindow& emu_window;
+};
+} // namespace VideoCommon::Shader
author	bunnei	2020-07-18 00:48:27 -0400
committer	GitHub	2020-07-18 00:48:27 -0400
commit	90cbcaa44a3901a832556258b5b97d8d7de34ca9 (patch)
tree	570ff95dae035757fb2831804aae4f4ca681d354 /src/video_core/shader
parent	Merge pull request #4364 from lioncash/desig5 (diff)
parent	Fix style issues (diff)
download	yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.tar.gz yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.tar.xz yuzu-90cbcaa44a3901a832556258b5b97d8d7de34ca9.zip

diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp new file mode 100644 index 000000000..b7f66d7ee --- /dev/null +++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,181 @@
	1	// Copyright 2020 yuzu Emulator Project
	2	// Licensed under GPLv2 or any later version
	3	// Refer to the license.txt file included.
	4
	5	#include <chrono>
	6	#include <condition_variable>
	7	#include <mutex>
	8	#include <thread>
	9	#include <vector>
	10	#include "video_core/engines/maxwell_3d.h"
	11	#include "video_core/renderer_base.h"
	12	#include "video_core/renderer_opengl/gl_shader_cache.h"
	13	#include "video_core/shader/async_shaders.h"
	14
	15	namespace VideoCommon::Shader {
	16
	17	AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
	18
	19	AsyncShaders::~AsyncShaders() {
	20	KillWorkers();
	21	}
	22
	23	void AsyncShaders::AllocateWorkers(std::size_t num_workers) {
	24	// If we're already have workers queued or don't want to queue workers, ignore
	25	if (num_workers == worker_threads.size() \|\| num_workers == 0) {
	26	return;
	27	}
	28
	29	// If workers already exist, clear them
	30	if (!worker_threads.empty()) {
	31	FreeWorkers();
	32	}
	33
	34	// Create workers
	35	for (std::size_t i = 0; i < num_workers; i++) {
	36	context_list.push_back(emu_window.CreateSharedContext());
	37	worker_threads.push_back(std::move(
	38	std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())));
	39	}
	40	}
	41
	42	void AsyncShaders::FreeWorkers() {
	43	// Mark all threads to quit
	44	is_thread_exiting.store(true);
	45	cv.notify_all();
	46	for (auto& thread : worker_threads) {
	47	thread.join();
	48	}
	49	// Clear our shared contexts
	50	context_list.clear();
	51
	52	// Clear our worker threads
	53	worker_threads.clear();
	54	}
	55
	56	void AsyncShaders::KillWorkers() {
	57	is_thread_exiting.store(true);
	58	for (auto& thread : worker_threads) {
	59	thread.detach();
	60	}
	61	// Clear our shared contexts
	62	context_list.clear();
	63
	64	// Clear our worker threads
	65	worker_threads.clear();
	66	}
	67
	68	bool AsyncShaders::HasWorkQueued() {
	69	return !pending_queue.empty();
	70	}
	71
	72	bool AsyncShaders::HasCompletedWork() {
	73	std::shared_lock lock{completed_mutex};
	74	return !finished_work.empty();
	75	}
	76
	77	bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
	78	const auto& regs = gpu.Maxwell3D().regs;
	79
	80	// If something is using depth, we can assume that games are not rendering anything which will
	81	// be used one time.
	82	if (regs.zeta_enable) {
	83	return true;
	84	}
	85
	86	// If games are using a small index count, we can assume these are full screen quads. Usually
	87	// these shaders are only used once for building textures so we can assume they can't be built
	88	// async
	89	if (regs.index_array.count <= 6 \|\| regs.vertex_buffer.count <= 6) {
	90	return false;
	91	}
	92
	93	return true;
	94	}
	95
	96	std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
	97	std::vector<AsyncShaders::Result> results;
	98	{
	99	std::unique_lock lock{completed_mutex};
	100	results.assign(std::make_move_iterator(finished_work.begin()),
	101	std::make_move_iterator(finished_work.end()));
	102	finished_work.clear();
	103	}
	104	return results;
	105	}
	106
	107	void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
	108	Tegra::Engines::ShaderType shader_type, u64 uid,
	109	std::vector<u64> code, std::vector<u64> code_b,
	110	u32 main_offset,
	111	VideoCommon::Shader::CompilerSettings compiler_settings,
	112	const VideoCommon::Shader::Registry& registry,
	113	VAddr cpu_addr) {
	114	WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
	115	: AsyncShaders::Backend::OpenGL,
	116	device,
	117	shader_type,
	118	uid,
	119	std::move(code),
	120	std::move(code_b),
	121	main_offset,
	122	compiler_settings,
	123	registry,
	124	cpu_addr};
	125	std::unique_lock lock(queue_mutex);
	126	pending_queue.push_back(std::move(params));
	127	cv.notify_one();
	128	}
	129
	130	void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
	131	using namespace std::chrono_literals;
	132	while (!is_thread_exiting.load(std::memory_order_relaxed)) {
	133	std::unique_lock lock{queue_mutex};
	134	cv.wait(lock, [this] { return HasWorkQueued() \|\| is_thread_exiting; });
	135	if (is_thread_exiting) {
	136	return;
	137	}
	138
	139	// Partial lock to allow all threads to read at the same time
	140	if (!HasWorkQueued()) {
	141	continue;
	142	}
	143	// Another thread beat us, just unlock and wait for the next load
	144	if (pending_queue.empty()) {
	145	continue;
	146	}
	147	// Pull work from queue
	148	WorkerParams work = std::move(pending_queue.front());
	149	pending_queue.pop_front();
	150
	151	lock.unlock();
	152
	153	if (work.backend == AsyncShaders::Backend::OpenGL \|\|
	154	work.backend == AsyncShaders::Backend::GLASM) {
	155	const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
	156	const auto scope = context->Acquire();
	157	auto program =
	158	OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
	159	Result result{};
	160	result.backend = work.backend;
	161	result.cpu_address = work.cpu_address;
	162	result.uid = work.uid;
	163	result.code = std::move(work.code);
	164	result.code_b = std::move(work.code_b);
	165	result.shader_type = work.shader_type;
	166
	167	if (work.backend == AsyncShaders::Backend::OpenGL) {
	168	result.program.opengl = std::move(program->source_program);
	169	} else if (work.backend == AsyncShaders::Backend::GLASM) {
	170	result.program.glasm = std::move(program->assembly_program);
	171	}
	172
	173	{
	174	std::unique_lock complete_lock(completed_mutex);
	175	finished_work.push_back(std::move(result));
	176	}
	177	}
	178	}
	179	}
	180
	181	} // namespace VideoCommon::Shader


diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h new file mode 100644 index 000000000..2f5ee94ad --- /dev/null +++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,109 @@
	1	// Copyright 2020 yuzu Emulator Project
	2	// Licensed under GPLv2 or any later version
	3	// Refer to the license.txt file included.
	4
	5	#pragma once
	6
	7	#include <condition_variable>
	8	#include <deque>
	9	#include <memory>
	10	#include <shared_mutex>
	11	#include <thread>
	12	#include "common/bit_field.h"
	13	#include "common/common_types.h"
	14	#include "video_core/renderer_opengl/gl_device.h"
	15	#include "video_core/renderer_opengl/gl_resource_manager.h"
	16	#include "video_core/renderer_opengl/gl_shader_decompiler.h"
	17
	18	namespace Core::Frontend {
	19	class EmuWindow;
	20	class GraphicsContext;
	21	} // namespace Core::Frontend
	22
	23	namespace Tegra {
	24	class GPU;
	25	}
	26
	27	namespace VideoCommon::Shader {
	28
	29	class AsyncShaders {
	30	public:
	31	enum class Backend {
	32	OpenGL,
	33	GLASM,
	34	};
	35
	36	struct ResultPrograms {
	37	OpenGL::OGLProgram opengl;
	38	OpenGL::OGLAssemblyProgram glasm;
	39	};
	40
	41	struct Result {
	42	u64 uid;
	43	VAddr cpu_address;
	44	Backend backend;
	45	ResultPrograms program;
	46	std::vector<u64> code;
	47	std::vector<u64> code_b;
	48	Tegra::Engines::ShaderType shader_type;
	49	};
	50
	51	explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
	52	~AsyncShaders();
	53
	54	/// Start up shader worker threads
	55	void AllocateWorkers(std::size_t num_workers);
	56
	57	/// Clear the shader queue and kill all worker threads
	58	void FreeWorkers();
	59
	60	// Force end all threads
	61	void KillWorkers();
	62
	63	/// Check to see if any shaders have actually been compiled
	64	bool HasCompletedWork();
	65
	66	/// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
	67	/// every shader async as some shaders are only built and executed once. We try to "guess" which
	68	/// shader would be used only once
	69	bool IsShaderAsync(const Tegra::GPU& gpu) const;
	70
	71	/// Pulls completed compiled shaders
	72	std::vector<Result> GetCompletedWork();
	73
	74	void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
	75	u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
	76	VideoCommon::Shader::CompilerSettings compiler_settings,
	77	const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
	78
	79	private:
	80	void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
	81
	82	/// Check our worker queue to see if we have any work queued already
	83	bool HasWorkQueued();
	84
	85	struct WorkerParams {
	86	AsyncShaders::Backend backend;
	87	OpenGL::Device device;
	88	Tegra::Engines::ShaderType shader_type;
	89	u64 uid;
	90	std::vector<u64> code;
	91	std::vector<u64> code_b;
	92	u32 main_offset;
	93	VideoCommon::Shader::CompilerSettings compiler_settings;
	94	VideoCommon::Shader::Registry registry;
	95	VAddr cpu_address;
	96	};
	97
	98	std::condition_variable cv;
	99	std::mutex queue_mutex;
	100	std::shared_mutex completed_mutex;
	101	std::atomic<bool> is_thread_exiting{};
	102	std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
	103	std::vector<std::thread> worker_threads;
	104	std::deque<WorkerParams> pending_queue;
	105	std::vector<AsyncShaders::Result> finished_work;
	106	Core::Frontend::EmuWindow& emu_window;
	107	};
	108
	109	} // namespace VideoCommon::Shader