gpu: Use host address for caching instead of guest address.

author: bunnei 2019-02-18 20:58:32 -0500
committer: bunnei 2019-03-14 22:34:42 -0400
commit: 2eaf6c41a4686028c0abc84d1be6fd48a67cf49f (patch)
tree: 6ad0848c848aea68e637386cad5068e13c831b92 /src/video_core/gpu_thread.cpp
parent: Merge pull request #2233 from ReinUsesLisp/morton-cleanup (diff)
download: yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.gz
yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.xz
yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.zip
1 files changed, 41 insertions, 95 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c5bdd2a17..086b2f625 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,6 @@
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/frontend/scope_acquire_window_context.h"
-#include "core/settings.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -13,38 +12,13 @@
 namespace VideoCommon::GPUThread {
-/// Executes a single GPU thread command
-static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
-                           Tegra::DmaPusher& dma_pusher) {
-    if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
-        dma_pusher.Push(std::move(submit_list->entries));
-        dma_pusher.DispatchCalls();
-    } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
-        renderer.SwapBuffers(data->framebuffer);
-    } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
-        renderer.Rasterizer().FlushRegion(data->addr, data->size);
-    } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
-        renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-    } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
-        renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
-    } else {
-        UNREACHABLE();
-    }
-}
 /// Runs the GPU thread
 static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
                      SynchState& state) {
    MicroProfileOnThreadCreate("GpuThread");
-    auto WaitForWakeup = [&]() {
-        std::unique_lock<std::mutex> lock{state.signal_mutex};
-        state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
-    };
    // Wait for first GPU command before acquiring the window context
-    WaitForWakeup();
+    state.WaitForCommands();
    // If emulation was stopped during disk shader loading, abort before trying to acquire context
    if (!state.is_running) {
@@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
    Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
+    CommandDataContainer next;
    while (state.is_running) {
-        if (!state.is_running) {
+        state.WaitForCommands();
-            return;
+        while (!state.queue.Empty()) {
-        }
+            state.queue.Pop(next);
+            if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
-        {
+                dma_pusher.Push(std::move(submit_list->entries));
-            // Thread has been woken up, so make the previous write queue the next read queue
+                dma_pusher.DispatchCalls();
-            std::lock_guard<std::mutex> lock{state.signal_mutex};
+            } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
-            std::swap(state.push_queue, state.pop_queue);
+                state.DecrementFramesCounter();
-        }
+                renderer.SwapBuffers(std::move(data->framebuffer));
+            } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
-        // Execute all of the GPU commands
+                renderer.Rasterizer().FlushRegion(data->addr, data->size);
-        while (!state.pop_queue->empty()) {
+            } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);
+                renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
-            state.pop_queue->pop();
+            } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
+                return;
+            } else {
+                UNREACHABLE();
+            }
        }
-        state.UpdateIdleState();
-        // Signal that the GPU thread has finished processing commands
-        if (state.is_idle) {
-            state.idle_condition.notify_one();
-        }
-        // Wait for CPU thread to send more GPU commands
-        WaitForWakeup();
    }
 }
 ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
    : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
-                                                         std::ref(dma_pusher), std::ref(state)},
+                                                         std::ref(dma_pusher), std::ref(state)} {}
-      thread_id{thread.get_id()} {}
 ThreadManager::~ThreadManager() {
-    {
+    // Notify GPU thread that a shutdown is pending
-        // Notify GPU thread that a shutdown is pending
+    PushCommand(EndProcessingCommand());
-        std::lock_guard<std::mutex> lock{state.signal_mutex};
-        state.is_running = false;
-    }
-    state.signal_condition.notify_one();
    thread.join();
 }
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
-    if (entries.empty()) {
+    PushCommand(SubmitListCommand(std::move(entries)));
-        return;
-    }
-    PushCommand(SubmitListCommand(std::move(entries)), false, false);
 }
 void ThreadManager::SwapBuffers(
    std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-    PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);
+    state.IncrementFramesCounter();
+    PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+    state.WaitForFrames();
 }
-void ThreadManager::FlushRegion(VAddr addr, u64 size) {
+void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
-    // Block the CPU when using accurate emulation
+    PushCommand(FlushRegionCommand(addr, size));
-    PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
 }
-void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
+void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
-    PushCommand(InvalidateRegionCommand(addr, size), true, true);
+    if (state.queue.Empty()) {
+        // It's quicker to invalidate a single region on the CPU if the queue is already empty
+        renderer.Rasterizer().InvalidateRegion(addr, size);
+    } else {
+        PushCommand(InvalidateRegionCommand(addr, size));
+    }
 }
-void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
+void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
    InvalidateRegion(addr, size);
 }
-void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {
+void ThreadManager::PushCommand(CommandData&& command_data) {
-    {
+    state.queue.Push(CommandDataContainer(std::move(command_data)));
-        std::lock_guard<std::mutex> lock{state.signal_mutex};
+    state.SignalCommands();
-        if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
-            // Execute the command synchronously on the current thread
-            ExecuteCommand(&command_data, renderer, dma_pusher);
-            return;
-        }
-        // Push the command to the GPU thread
-        state.UpdateIdleState();
-        state.push_queue->emplace(command_data);
-    }
-    // Signal the GPU thread that commands are pending
-    state.signal_condition.notify_one();
-    if (wait_for_idle) {
-        // Wait for the GPU to be idle (all commands to be executed)
-        std::unique_lock<std::mutex> lock{state.idle_mutex};
-        state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
-    }
 }
 } // namespace VideoCommon::GPUThread
author	bunnei	2019-02-18 20:58:32 -0500
committer	bunnei	2019-03-14 22:34:42 -0400
commit	2eaf6c41a4686028c0abc84d1be6fd48a67cf49f (patch)
tree	6ad0848c848aea68e637386cad5068e13c831b92 /src/video_core/gpu_thread.cpp
parent	Merge pull request #2233 from ReinUsesLisp/morton-cleanup (diff)
download	yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.gz yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.xz yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.zip

diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c5bdd2a17..086b2f625 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,6 @@
5	#include "common/assert.h"	5	#include "common/assert.h"
6	#include "common/microprofile.h"	6	#include "common/microprofile.h"
7	#include "core/frontend/scope_acquire_window_context.h"	7	#include "core/frontend/scope_acquire_window_context.h"
8	#include "core/settings.h"
9	#include "video_core/dma_pusher.h"	8	#include "video_core/dma_pusher.h"
10	#include "video_core/gpu.h"	9	#include "video_core/gpu.h"
11	#include "video_core/gpu_thread.h"	10	#include "video_core/gpu_thread.h"
@@ -13,38 +12,13 @@
13		12
14	namespace VideoCommon::GPUThread {	13	namespace VideoCommon::GPUThread {
15		14
16	/// Executes a single GPU thread command
17	static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
18	Tegra::DmaPusher& dma_pusher) {
19	if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
20	dma_pusher.Push(std::move(submit_list->entries));
21	dma_pusher.DispatchCalls();
22	} else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
23	renderer.SwapBuffers(data->framebuffer);
24	} else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
25	renderer.Rasterizer().FlushRegion(data->addr, data->size);
26	} else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
27	renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
28	} else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
29	renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
30	} else {
31	UNREACHABLE();
32	}
33	}
34
35	/// Runs the GPU thread	15	/// Runs the GPU thread
36	static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,	16	static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
37	SynchState& state) {	17	SynchState& state) {
38
39	MicroProfileOnThreadCreate("GpuThread");	18	MicroProfileOnThreadCreate("GpuThread");
40		19
41	auto WaitForWakeup = [&]() {
42	std::unique_lock<std::mutex> lock{state.signal_mutex};
43	state.signal_condition.wait(lock, [&] { return !state.is_idle \|\| !state.is_running; });
44	};
45
46	// Wait for first GPU command before acquiring the window context	20	// Wait for first GPU command before acquiring the window context
47	WaitForWakeup();	21	state.WaitForCommands();
48		22
49	// If emulation was stopped during disk shader loading, abort before trying to acquire context	23	// If emulation was stopped during disk shader loading, abort before trying to acquire context
50	if (!state.is_running) {	24	if (!state.is_running) {
@@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
53		27
54	Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};	28	Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
55		29
		30	CommandDataContainer next;
56	while (state.is_running) {	31	while (state.is_running) {
57	if (!state.is_running) {	32	state.WaitForCommands();
58	return;	33	while (!state.queue.Empty()) {
59	}	34	state.queue.Pop(next);
60		35	if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
61	{	36	dma_pusher.Push(std::move(submit_list->entries));
62	// Thread has been woken up, so make the previous write queue the next read queue	37	dma_pusher.DispatchCalls();
63	std::lock_guard<std::mutex> lock{state.signal_mutex};	38	} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
64	std::swap(state.push_queue, state.pop_queue);	39	state.DecrementFramesCounter();
65	}	40	renderer.SwapBuffers(std::move(data->framebuffer));
66		41	} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
67	// Execute all of the GPU commands	42	renderer.Rasterizer().FlushRegion(data->addr, data->size);
68	while (!state.pop_queue->empty()) {	43	} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
69	ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher);	44	renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
70	state.pop_queue->pop();	45	} else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
		46	return;
		47	} else {
		48	UNREACHABLE();
		49	}
71	}	50	}
72
73	state.UpdateIdleState();
74
75	// Signal that the GPU thread has finished processing commands
76	if (state.is_idle) {
77	state.idle_condition.notify_one();
78	}
79
80	// Wait for CPU thread to send more GPU commands
81	WaitForWakeup();
82	}	51	}
83	}	52	}
84		53
85	ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)	54	ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
86	: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),	55	: renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
87	std::ref(dma_pusher), std::ref(state)},	56	std::ref(dma_pusher), std::ref(state)} {}
88	thread_id{thread.get_id()} {}
89		57
90	ThreadManager::~ThreadManager() {	58	ThreadManager::~ThreadManager() {
91	{	59	// Notify GPU thread that a shutdown is pending
92	// Notify GPU thread that a shutdown is pending	60	PushCommand(EndProcessingCommand());
93	std::lock_guard<std::mutex> lock{state.signal_mutex};
94	state.is_running = false;
95	}
96
97	state.signal_condition.notify_one();
98	thread.join();	61	thread.join();
99	}	62	}
100		63
101	void ThreadManager::SubmitList(Tegra::CommandList&& entries) {	64	void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
102	if (entries.empty()) {	65	PushCommand(SubmitListCommand(std::move(entries)));
103	return;
104	}
105
106	PushCommand(SubmitListCommand(std::move(entries)), false, false);
107	}	66	}
108		67
109	void ThreadManager::SwapBuffers(	68	void ThreadManager::SwapBuffers(
110	std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {	69	std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
111	PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false);	70	state.IncrementFramesCounter();
		71	PushCommand(SwapBuffersCommand(std::move(framebuffer)));
		72	state.WaitForFrames();
112	}	73	}
113		74
114	void ThreadManager::FlushRegion(VAddr addr, u64 size) {	75	void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
115	// Block the CPU when using accurate emulation	76	PushCommand(FlushRegionCommand(addr, size));
116	PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
117	}	77	}
118		78
119	void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {	79	void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
120	PushCommand(InvalidateRegionCommand(addr, size), true, true);	80	if (state.queue.Empty()) {
		81	// It's quicker to invalidate a single region on the CPU if the queue is already empty
		82	renderer.Rasterizer().InvalidateRegion(addr, size);
		83	} else {
		84	PushCommand(InvalidateRegionCommand(addr, size));
		85	}
121	}	86	}
122		87
123	void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {	88	void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
		89	// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
124	InvalidateRegion(addr, size);	90	InvalidateRegion(addr, size);
125	}	91	}
126		92
127	void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) {	93	void ThreadManager::PushCommand(CommandData&& command_data) {
128	{	94	state.queue.Push(CommandDataContainer(std::move(command_data)));
129	std::lock_guard<std::mutex> lock{state.signal_mutex};	95	state.SignalCommands();
130
131	if ((allow_on_cpu && state.is_idle) \|\| IsGpuThread()) {
132	// Execute the command synchronously on the current thread
133	ExecuteCommand(&command_data, renderer, dma_pusher);
134	return;
135	}
136
137	// Push the command to the GPU thread
138	state.UpdateIdleState();
139	state.push_queue->emplace(command_data);
140	}
141
142	// Signal the GPU thread that commands are pending
143	state.signal_condition.notify_one();
144
145	if (wait_for_idle) {
146	// Wait for the GPU to be idle (all commands to be executed)
147	std::unique_lock<std::mutex> lock{state.idle_mutex};
148	state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
149	}
150	}	96	}
151		97
152	} // namespace VideoCommon::GPUThread	98	} // namespace VideoCommon::GPUThread