diff options
Diffstat (limited to 'src/video_core/gpu_thread.cpp')
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 64 |
1 files changed, 47 insertions, 17 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index bf761abf2..7e490bcc3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "common/scope_exit.h" | ||
| 7 | #include "common/thread.h" | 8 | #include "common/thread.h" |
| 8 | #include "core/core.h" | 9 | #include "core/core.h" |
| 9 | #include "core/frontend/emu_window.h" | 10 | #include "core/frontend/emu_window.h" |
| @@ -18,9 +19,11 @@ namespace VideoCommon::GPUThread { | |||
| 18 | /// Runs the GPU thread | 19 | /// Runs the GPU thread |
| 19 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | 20 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, |
| 20 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, | 21 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, |
| 21 | SynchState& state) { | 22 | SynchState& state, Tegra::CDmaPusher& cdma_pusher) { |
| 22 | std::string name = "yuzu:GPU"; | 23 | std::string name = "yuzu:GPU"; |
| 23 | MicroProfileOnThreadCreate(name.c_str()); | 24 | MicroProfileOnThreadCreate(name.c_str()); |
| 25 | SCOPE_EXIT({ MicroProfileOnThreadExit(); }); | ||
| 26 | |||
| 24 | Common::SetCurrentThreadName(name.c_str()); | 27 | Common::SetCurrentThreadName(name.c_str()); |
| 25 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | 28 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); |
| 26 | system.RegisterHostThread(); | 29 | system.RegisterHostThread(); |
| @@ -39,19 +42,23 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 39 | CommandDataContainer next; | 42 | CommandDataContainer next; |
| 40 | while (state.is_running) { | 43 | while (state.is_running) { |
| 41 | next = state.queue.PopWait(); | 44 | next = state.queue.PopWait(); |
| 42 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | 45 | if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { |
| 43 | dma_pusher.Push(std::move(submit_list->entries)); | 46 | dma_pusher.Push(std::move(submit_list->entries)); |
| 44 | dma_pusher.DispatchCalls(); | 47 | dma_pusher.DispatchCalls(); |
| 45 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | 48 | } else if (auto* command_list = std::get_if<SubmitChCommandEntries>(&next.data)) { |
| 49 | // NVDEC | ||
| 50 | cdma_pusher.Push(std::move(command_list->entries)); | ||
| 51 | cdma_pusher.DispatchCalls(); | ||
| 52 | } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { | ||
| 46 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 53 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 47 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { | 54 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { |
| 48 | renderer.Rasterizer().ReleaseFences(); | 55 | renderer.Rasterizer().ReleaseFences(); |
| 49 | } else if (std::holds_alternative<GPUTickCommand>(next.data)) { | 56 | } else if (std::holds_alternative<GPUTickCommand>(next.data)) { |
| 50 | system.GPU().TickWork(); | 57 | system.GPU().TickWork(); |
| 51 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 58 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { |
| 52 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 59 | renderer.Rasterizer().FlushRegion(flush->addr, flush->size); |
| 53 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 60 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 54 | renderer.Rasterizer().OnCPUWrite(data->addr, data->size); | 61 | renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); |
| 55 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | 62 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |
| 56 | return; | 63 | return; |
| 57 | } else { | 64 | } else { |
| @@ -61,7 +68,8 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 61 | } | 68 | } |
| 62 | } | 69 | } |
| 63 | 70 | ||
| 64 | ThreadManager::ThreadManager(Core::System& system) : system{system} {} | 71 | ThreadManager::ThreadManager(Core::System& system_, bool is_async_) |
| 72 | : system{system_}, is_async{is_async_} {} | ||
| 65 | 73 | ||
| 66 | ThreadManager::~ThreadManager() { | 74 | ThreadManager::~ThreadManager() { |
| 67 | if (!thread.joinable()) { | 75 | if (!thread.joinable()) { |
| @@ -75,33 +83,48 @@ ThreadManager::~ThreadManager() { | |||
| 75 | 83 | ||
| 76 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 84 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| 77 | Core::Frontend::GraphicsContext& context, | 85 | Core::Frontend::GraphicsContext& context, |
| 78 | Tegra::DmaPusher& dma_pusher) { | 86 | Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { |
| 79 | thread = std::thread{RunThread, std::ref(system), std::ref(renderer), | 87 | thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), |
| 80 | std::ref(context), std::ref(dma_pusher), std::ref(state)}; | 88 | std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); |
| 81 | } | 89 | } |
| 82 | 90 | ||
| 83 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 91 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 84 | PushCommand(SubmitListCommand(std::move(entries))); | 92 | PushCommand(SubmitListCommand(std::move(entries))); |
| 85 | } | 93 | } |
| 86 | 94 | ||
| 95 | void ThreadManager::SubmitCommandBuffer(Tegra::ChCommandHeaderList&& entries) { | ||
| 96 | PushCommand(SubmitChCommandEntries(std::move(entries))); | ||
| 97 | } | ||
| 98 | |||
| 87 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 99 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 88 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | 100 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); |
| 89 | } | 101 | } |
| 90 | 102 | ||
| 91 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 103 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
| 92 | if (!Settings::IsGPULevelHigh()) { | 104 | if (!is_async) { |
| 105 | // Always flush with synchronous GPU mode | ||
| 93 | PushCommand(FlushRegionCommand(addr, size)); | 106 | PushCommand(FlushRegionCommand(addr, size)); |
| 94 | return; | 107 | return; |
| 95 | } | 108 | } |
| 96 | if (!Settings::IsGPULevelExtreme()) { | 109 | |
| 97 | return; | 110 | // Asynchronous GPU mode |
| 98 | } | 111 | switch (Settings::values.gpu_accuracy.GetValue()) { |
| 99 | if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { | 112 | case Settings::GPUAccuracy::Normal: |
| 113 | PushCommand(FlushRegionCommand(addr, size)); | ||
| 114 | break; | ||
| 115 | case Settings::GPUAccuracy::High: | ||
| 116 | // TODO(bunnei): Is this right? Preserving existing behavior for now | ||
| 117 | break; | ||
| 118 | case Settings::GPUAccuracy::Extreme: { | ||
| 100 | auto& gpu = system.GPU(); | 119 | auto& gpu = system.GPU(); |
| 101 | u64 fence = gpu.RequestFlush(addr, size); | 120 | u64 fence = gpu.RequestFlush(addr, size); |
| 102 | PushCommand(GPUTickCommand()); | 121 | PushCommand(GPUTickCommand()); |
| 103 | while (fence > gpu.CurrentFlushRequestFence()) { | 122 | while (fence > gpu.CurrentFlushRequestFence()) { |
| 104 | } | 123 | } |
| 124 | break; | ||
| 125 | } | ||
| 126 | default: | ||
| 127 | UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue()); | ||
| 105 | } | 128 | } |
| 106 | } | 129 | } |
| 107 | 130 | ||
| @@ -115,7 +138,8 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 115 | } | 138 | } |
| 116 | 139 | ||
| 117 | void ThreadManager::WaitIdle() const { | 140 | void ThreadManager::WaitIdle() const { |
| 118 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { | 141 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && |
| 142 | system.IsPoweredOn()) { | ||
| 119 | } | 143 | } |
| 120 | } | 144 | } |
| 121 | 145 | ||
| @@ -126,6 +150,12 @@ void ThreadManager::OnCommandListEnd() { | |||
| 126 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 150 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 127 | const u64 fence{++state.last_fence}; | 151 | const u64 fence{++state.last_fence}; |
| 128 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 152 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 153 | |||
| 154 | if (!is_async) { | ||
| 155 | // In synchronous GPU mode, block the caller until the command has executed | ||
| 156 | WaitIdle(); | ||
| 157 | } | ||
| 158 | |||
| 129 | return fence; | 159 | return fence; |
| 130 | } | 160 | } |
| 131 | 161 | ||