gpu: Use host address for caching instead of guest address.

author: bunnei 2019-02-18 20:58:32 -0500
committer: bunnei 2019-03-14 22:34:42 -0400
commit: 2eaf6c41a4686028c0abc84d1be6fd48a67cf49f (patch)
tree: 6ad0848c848aea68e637386cad5068e13c831b92 /src/video_core/gpu_thread.h
parent: Merge pull request #2233 from ReinUsesLisp/morton-cleanup (diff)
download: yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.gz
yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.xz
yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.zip
1 files changed, 92 insertions, 40 deletions
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index edb148b14..8cd7db1c6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -13,6 +13,9 @@
 #include <thread>
 #include <variant>
+#include "common/threadsafe_queue.h"
+#include "video_core/gpu.h"
 namespace Tegra {
 struct FramebufferConfig;
 class DmaPusher;
@@ -24,6 +27,9 @@ class RendererBase;
 namespace VideoCommon::GPUThread {
+/// Command to signal to the GPU thread that processing has ended
+struct EndProcessingCommand final {};
 /// Command to signal to the GPU thread that a command list is ready for processing
 struct SubmitListCommand final {
    explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
    explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
        : framebuffer{std::move(framebuffer)} {}
-    std::optional<const Tegra::FramebufferConfig> framebuffer;
+    std::optional<Tegra::FramebufferConfig> framebuffer;
 };
 /// Command to signal to the GPU thread to flush a region
 struct FlushRegionCommand final {
-    explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
-    const VAddr addr;
+    CacheAddr addr;
-    const u64 size;
+    u64 size;
 };
 /// Command to signal to the GPU thread to invalidate a region
 struct InvalidateRegionCommand final {
-    explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+    explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
-    const VAddr addr;
+    CacheAddr addr;
-    const u64 size;
+    u64 size;
 };
 /// Command to signal to the GPU thread to flush and invalidate a region
 struct FlushAndInvalidateRegionCommand final {
-    explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+    explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
        : addr{addr}, size{size} {}
-    const VAddr addr;
+    CacheAddr addr;
-    const u64 size;
+    u64 size;
 };
-using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+using CommandData =
-                                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+    std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+                 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+struct CommandDataContainer {
+    CommandDataContainer() = default;
+    CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
+    CommandDataContainer& operator=(const CommandDataContainer& t) {
+        data = std::move(t.data);
+        return *this;
+    }
+    CommandData data;
+};
 /// Struct used to synchronize the GPU thread
 struct SynchState final {
-    std::atomic<bool> is_running{true};
+    std::atomic_bool is_running{true};
-    std::atomic<bool> is_idle{true};
+    std::atomic_int queued_frame_count{};
-    std::condition_variable signal_condition;
+    std::mutex frames_mutex;
-    std::mutex signal_mutex;
+    std::mutex commands_mutex;
-    std::condition_variable idle_condition;
+    std::condition_variable commands_condition;
-    std::mutex idle_mutex;
+    std::condition_variable frames_condition;
-    // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
+    void IncrementFramesCounter() {
-    // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
+        std::lock_guard<std::mutex> lock{frames_mutex};
-    // empty. This allows for efficient thread-safe access, as it does not require any copies.
+        ++queued_frame_count;
+    }
-    using CommandQueue = std::queue<CommandData>;
-    std::array<CommandQueue, 2> command_queues;
+    void DecrementFramesCounter() {
-    CommandQueue* push_queue{&command_queues[0]};
+        {
-    CommandQueue* pop_queue{&command_queues[1]};
+            std::lock_guard<std::mutex> lock{frames_mutex};
+            --queued_frame_count;
-    void UpdateIdleState() {
-        std::lock_guard<std::mutex> lock{idle_mutex};
+            if (queued_frame_count) {
-        is_idle = command_queues[0].empty() && command_queues[1].empty();
+                return;
+            }
+        }
+        frames_condition.notify_one();
    }
+    void WaitForFrames() {
+        {
+            std::lock_guard<std::mutex> lock{frames_mutex};
+            if (!queued_frame_count) {
+                return;
+            }
+        }
+        // Wait for the GPU to be idle (all commands to be executed)
+        {
+            std::unique_lock<std::mutex> lock{frames_mutex};
+            frames_condition.wait(lock, [this] { return !queued_frame_count; });
+        }
+    }
+    void SignalCommands() {
+        {
+            std::unique_lock<std::mutex> lock{commands_mutex};
+            if (queue.Empty()) {
+                return;
+            }
+        }
+        commands_condition.notify_one();
+    }
+    void WaitForCommands() {
+        std::unique_lock<std::mutex> lock{commands_mutex};
+        commands_condition.wait(lock, [this] { return !queue.Empty(); });
+    }
+    using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+    CommandQueue queue;
 };
 /// Class used to manage the GPU thread
@@ -105,22 +162,17 @@ public:
        std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
    /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
-    void FlushRegion(VAddr addr, u64 size);
+    void FlushRegion(CacheAddr addr, u64 size);
    /// Notify rasterizer that any caches of the specified region should be invalidated
-    void InvalidateRegion(VAddr addr, u64 size);
+    void InvalidateRegion(CacheAddr addr, u64 size);
    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
-    void FlushAndInvalidateRegion(VAddr addr, u64 size);
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
 private:
    /// Pushes a command to be executed by the GPU thread
-    void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
+    void PushCommand(CommandData&& command_data);
-    /// Returns true if this is called by the GPU thread
-    bool IsGpuThread() const {
-        return std::this_thread::get_id() == thread_id;
-    }
 private:
    SynchState state;
author	bunnei	2019-02-18 20:58:32 -0500
committer	bunnei	2019-03-14 22:34:42 -0400
commit	2eaf6c41a4686028c0abc84d1be6fd48a67cf49f (patch)
tree	6ad0848c848aea68e637386cad5068e13c831b92 /src/video_core/gpu_thread.h
parent	Merge pull request #2233 from ReinUsesLisp/morton-cleanup (diff)
download	yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.gz yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.xz yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.zip

diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index edb148b14..8cd7db1c6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h
@@ -13,6 +13,9 @@
13	#include <thread>	13	#include <thread>
14	#include <variant>	14	#include <variant>
15		15
		16	#include "common/threadsafe_queue.h"
		17	#include "video_core/gpu.h"
		18
16	namespace Tegra {	19	namespace Tegra {
17	struct FramebufferConfig;	20	struct FramebufferConfig;
18	class DmaPusher;	21	class DmaPusher;
@@ -24,6 +27,9 @@ class RendererBase;
24		27
25	namespace VideoCommon::GPUThread {	28	namespace VideoCommon::GPUThread {
26		29
		30	/// Command to signal to the GPU thread that processing has ended
		31	struct EndProcessingCommand final {};
		32
27	/// Command to signal to the GPU thread that a command list is ready for processing	33	/// Command to signal to the GPU thread that a command list is ready for processing
28	struct SubmitListCommand final {	34	struct SubmitListCommand final {
29	explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}	35	explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
36	explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)	42	explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
37	: framebuffer{std::move(framebuffer)} {}	43	: framebuffer{std::move(framebuffer)} {}
38		44
39	std::optional<const Tegra::FramebufferConfig> framebuffer;	45	std::optional<Tegra::FramebufferConfig> framebuffer;
40	};	46	};
41		47
42	/// Command to signal to the GPU thread to flush a region	48	/// Command to signal to the GPU thread to flush a region
43	struct FlushRegionCommand final {	49	struct FlushRegionCommand final {
44	explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}	50	explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
45		51
46	const VAddr addr;	52	CacheAddr addr;
47	const u64 size;	53	u64 size;
48	};	54	};
49		55
50	/// Command to signal to the GPU thread to invalidate a region	56	/// Command to signal to the GPU thread to invalidate a region
51	struct InvalidateRegionCommand final {	57	struct InvalidateRegionCommand final {
52	explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}	58	explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
53		59
54	const VAddr addr;	60	CacheAddr addr;
55	const u64 size;	61	u64 size;
56	};	62	};
57		63
58	/// Command to signal to the GPU thread to flush and invalidate a region	64	/// Command to signal to the GPU thread to flush and invalidate a region
59	struct FlushAndInvalidateRegionCommand final {	65	struct FlushAndInvalidateRegionCommand final {
60	explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)	66	explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
61	: addr{addr}, size{size} {}	67	: addr{addr}, size{size} {}
62		68
63	const VAddr addr;	69	CacheAddr addr;
64	const u64 size;	70	u64 size;
65	};	71	};
66		72
67	using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,	73	using CommandData =
68	InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;	74	std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
		75	InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
		76
		77	struct CommandDataContainer {
		78	CommandDataContainer() = default;
		79
		80	CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
		81
		82	CommandDataContainer& operator=(const CommandDataContainer& t) {
		83	data = std::move(t.data);
		84	return *this;
		85	}
		86
		87	CommandData data;
		88	};
69		89
70	/// Struct used to synchronize the GPU thread	90	/// Struct used to synchronize the GPU thread
71	struct SynchState final {	91	struct SynchState final {
72	std::atomic<bool> is_running{true};	92	std::atomic_bool is_running{true};
73	std::atomic<bool> is_idle{true};	93	std::atomic_int queued_frame_count{};
74	std::condition_variable signal_condition;	94	std::mutex frames_mutex;
75	std::mutex signal_mutex;	95	std::mutex commands_mutex;
76	std::condition_variable idle_condition;	96	std::condition_variable commands_condition;
77	std::mutex idle_mutex;	97	std::condition_variable frames_condition;
78		98
79	// We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and	99	void IncrementFramesCounter() {
80	// one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes	100	std::lock_guard<std::mutex> lock{frames_mutex};
81	// empty. This allows for efficient thread-safe access, as it does not require any copies.	101	++queued_frame_count;
82		102	}
83	using CommandQueue = std::queue<CommandData>;	103
84	std::array<CommandQueue, 2> command_queues;	104	void DecrementFramesCounter() {
85	CommandQueue* push_queue{&command_queues[0]};	105	{
86	CommandQueue* pop_queue{&command_queues[1]};	106	std::lock_guard<std::mutex> lock{frames_mutex};
87		107	--queued_frame_count;
88	void UpdateIdleState() {	108
89	std::lock_guard<std::mutex> lock{idle_mutex};	109	if (queued_frame_count) {
90	is_idle = command_queues[0].empty() && command_queues[1].empty();	110	return;
		111	}
		112	}
		113	frames_condition.notify_one();
91	}	114	}
		115
		116	void WaitForFrames() {
		117	{
		118	std::lock_guard<std::mutex> lock{frames_mutex};
		119	if (!queued_frame_count) {
		120	return;
		121	}
		122	}
		123
		124	// Wait for the GPU to be idle (all commands to be executed)
		125	{
		126	std::unique_lock<std::mutex> lock{frames_mutex};
		127	frames_condition.wait(lock, [this] { return !queued_frame_count; });
		128	}
		129	}
		130
		131	void SignalCommands() {
		132	{
		133	std::unique_lock<std::mutex> lock{commands_mutex};
		134	if (queue.Empty()) {
		135	return;
		136	}
		137	}
		138
		139	commands_condition.notify_one();
		140	}
		141
		142	void WaitForCommands() {
		143	std::unique_lock<std::mutex> lock{commands_mutex};
		144	commands_condition.wait(lock, [this] { return !queue.Empty(); });
		145	}
		146
		147	using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
		148	CommandQueue queue;
92	};	149	};
93		150
94	/// Class used to manage the GPU thread	151	/// Class used to manage the GPU thread
@@ -105,22 +162,17 @@ public:
105	std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);	162	std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
106		163
107	/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory	164	/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
108	void FlushRegion(VAddr addr, u64 size);	165	void FlushRegion(CacheAddr addr, u64 size);
109		166
110	/// Notify rasterizer that any caches of the specified region should be invalidated	167	/// Notify rasterizer that any caches of the specified region should be invalidated
111	void InvalidateRegion(VAddr addr, u64 size);	168	void InvalidateRegion(CacheAddr addr, u64 size);
112		169
113	/// Notify rasterizer that any caches of the specified region should be flushed and invalidated	170	/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
114	void FlushAndInvalidateRegion(VAddr addr, u64 size);	171	void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
115		172
116	private:	173	private:
117	/// Pushes a command to be executed by the GPU thread	174	/// Pushes a command to be executed by the GPU thread
118	void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);	175	void PushCommand(CommandData&& command_data);
119
120	/// Returns true if this is called by the GPU thread
121	bool IsGpuThread() const {
122	return std::this_thread::get_id() == thread_id;
123	}
124		176
125	private:	177	private:
126	SynchState state;	178	SynchState state;