summaryrefslogtreecommitdiff
path: root/src/video_core/gpu_thread.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/gpu_thread.cpp')
-rw-r--r--src/video_core/gpu_thread.cpp64
1 files changed, 47 insertions, 17 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index bf761abf2..7e490bcc3 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "common/scope_exit.h"
7#include "common/thread.h" 8#include "common/thread.h"
8#include "core/core.h" 9#include "core/core.h"
9#include "core/frontend/emu_window.h" 10#include "core/frontend/emu_window.h"
@@ -18,9 +19,11 @@ namespace VideoCommon::GPUThread {
18/// Runs the GPU thread 19/// Runs the GPU thread
19static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, 20static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
20 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, 21 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
21 SynchState& state) { 22 SynchState& state, Tegra::CDmaPusher& cdma_pusher) {
22 std::string name = "yuzu:GPU"; 23 std::string name = "yuzu:GPU";
23 MicroProfileOnThreadCreate(name.c_str()); 24 MicroProfileOnThreadCreate(name.c_str());
25 SCOPE_EXIT({ MicroProfileOnThreadExit(); });
26
24 Common::SetCurrentThreadName(name.c_str()); 27 Common::SetCurrentThreadName(name.c_str());
25 Common::SetCurrentThreadPriority(Common::ThreadPriority::High); 28 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
26 system.RegisterHostThread(); 29 system.RegisterHostThread();
@@ -39,19 +42,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
39 CommandDataContainer next; 42 CommandDataContainer next;
40 while (state.is_running) { 43 while (state.is_running) {
41 next = state.queue.PopWait(); 44 next = state.queue.PopWait();
42 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 45 if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
43 dma_pusher.Push(std::move(submit_list->entries)); 46 dma_pusher.Push(std::move(submit_list->entries));
44 dma_pusher.DispatchCalls(); 47 dma_pusher.DispatchCalls();
45 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 48 } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) {
49 // NVDEC
50 cdma_pusher.Push(std::move(command_list->entries));
51 cdma_pusher.DispatchCalls();
52 } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
46 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 53 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
47 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { 54 } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
48 renderer.Rasterizer().ReleaseFences(); 55 renderer.Rasterizer().ReleaseFences();
49 } else if (std::holds_alternative<GPUTickCommand>(next.data)) { 56 } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
50 system.GPU().TickWork(); 57 system.GPU().TickWork();
51 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 58 } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
52 renderer.Rasterizer().FlushRegion(data->addr, data->size); 59 renderer.Rasterizer().FlushRegion(flush->addr, flush->size);
53 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 60 } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
54 renderer.Rasterizer().OnCPUWrite(data->addr, data->size); 61 renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size);
55 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { 62 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
56 return; 63 return;
57 } else { 64 } else {
@@ -61,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
61 } 68 }
62} 69}
63 70
64ThreadManager::ThreadManager(Core::System& system) : system{system} {} 71ThreadManager::ThreadManager(Core::System& system_, bool is_async_)
72 : system{system_}, is_async{is_async_} {}
65 73
66ThreadManager::~ThreadManager() { 74ThreadManager::~ThreadManager() {
67 if (!thread.joinable()) { 75 if (!thread.joinable()) {
@@ -75,33 +83,48 @@ ThreadManager::~ThreadManager() {
75 83
76void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 84void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
77 Core::Frontend::GraphicsContext& context, 85 Core::Frontend::GraphicsContext& context,
78 Tegra::DmaPusher& dma_pusher) { 86 Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) {
79 thread = std::thread{RunThread, std::ref(system), std::ref(renderer), 87 thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
80 std::ref(context), std::ref(dma_pusher), std::ref(state)}; 88 std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher));
81} 89}
82 90
83void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 91void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
84 PushCommand(SubmitListCommand(std::move(entries))); 92 PushCommand(SubmitListCommand(std::move(entries)));
85} 93}
86 94
95void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) {
96 PushCommand(SubmitChCommandEntries(std::move(entries)));
97}
98
87void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 99void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
88 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 100 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
89} 101}
90 102
91void ThreadManager::FlushRegion(VAddr addr, u64 size) { 103void ThreadManager::FlushRegion(VAddr addr, u64 size) {
92 if (!Settings::IsGPULevelHigh()) { 104 if (!is_async) {
105 // Always flush with synchronous GPU mode
93 PushCommand(FlushRegionCommand(addr, size)); 106 PushCommand(FlushRegionCommand(addr, size));
94 return; 107 return;
95 } 108 }
96 if (!Settings::IsGPULevelExtreme()) { 109
97 return; 110 // Asynchronous GPU mode
98 } 111 switch (Settings::values.gpu_accuracy.GetValue()) {
99 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { 112 case Settings::GPUAccuracy::Normal:
113 PushCommand(FlushRegionCommand(addr, size));
114 break;
115 case Settings::GPUAccuracy::High:
116 // TODO(bunnei): Is this right? Preserving existing behavior for now
117 break;
118 case Settings::GPUAccuracy::Extreme: {
100 auto& gpu = system.GPU(); 119 auto& gpu = system.GPU();
101 u64 fence = gpu.RequestFlush(addr, size); 120 u64 fence = gpu.RequestFlush(addr, size);
102 PushCommand(GPUTickCommand()); 121 PushCommand(GPUTickCommand());
103 while (fence > gpu.CurrentFlushRequestFence()) { 122 while (fence > gpu.CurrentFlushRequestFence()) {
104 } 123 }
124 break;
125 }
126 default:
127 UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
105 } 128 }
106} 129}
107 130
@@ -115,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
115} 138}
116 139
117void ThreadManager::WaitIdle() const { 140void ThreadManager::WaitIdle() const {
118 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { 141 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
142 system.IsPoweredOn()) {
119 } 143 }
120} 144}
121 145
@@ -126,6 +150,12 @@ void ThreadManager::OnCommandListEnd() {
126u64 ThreadManager::PushCommand(CommandData&& command_data) { 150u64 ThreadManager::PushCommand(CommandData&& command_data) {
127 const u64 fence{++state.last_fence}; 151 const u64 fence{++state.last_fence};
128 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 152 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
153
154 if (!is_async) {
155 // In synchronous GPU mode, block the caller until the command has executed
156 WaitIdle();
157 }
158
129 return fence; 159 return fence;
130} 160}
131 161