diff options
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 2 | ||||
| -rw-r--r-- | src/core/memory.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 22 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 154 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 135 |
9 files changed, 358 insertions, 15 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 466db7ccd..a34b9e753 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 178 | auto& gpu = system_instance.GPU(); | 178 | auto& gpu = system_instance.GPU(); |
| 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); |
| 180 | ASSERT(cpu_addr); | 180 | ASSERT(cpu_addr); |
| 181 | system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); | 181 | gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); |
| 182 | 182 | ||
| 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 184 | 184 | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ec279cef8..6591c45d2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 356 | const VAddr overlap_end = std::min(end, region_end); | 356 | const VAddr overlap_end = std::min(end, region_end); |
| 357 | const VAddr overlap_size = overlap_end - overlap_start; | 357 | const VAddr overlap_size = overlap_end - overlap_start; |
| 358 | 358 | ||
| 359 | auto& rasterizer = system_instance.Renderer().Rasterizer(); | 359 | auto& gpu = system_instance.GPU(); |
| 360 | switch (mode) { | 360 | switch (mode) { |
| 361 | case FlushMode::Flush: | 361 | case FlushMode::Flush: |
| 362 | rasterizer.FlushRegion(overlap_start, overlap_size); | 362 | gpu.FlushRegion(overlap_start, overlap_size); |
| 363 | break; | 363 | break; |
| 364 | case FlushMode::Invalidate: | 364 | case FlushMode::Invalidate: |
| 365 | rasterizer.InvalidateRegion(overlap_start, overlap_size); | 365 | gpu.InvalidateRegion(overlap_start, overlap_size); |
| 366 | break; | 366 | break; |
| 367 | case FlushMode::FlushAndInvalidate: | 367 | case FlushMode::FlushAndInvalidate: |
| 368 | rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); | 368 | gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); |
| 369 | break; | 369 | break; |
| 370 | } | 370 | } |
| 371 | }; | 371 | }; |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..3bb5d0ed7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -17,6 +17,8 @@ add_library(video_core STATIC | |||
| 17 | engines/shader_header.h | 17 | engines/shader_header.h |
| 18 | gpu.cpp | 18 | gpu.cpp |
| 19 | gpu.h | 19 | gpu.h |
| 20 | gpu_thread.cpp | ||
| 21 | gpu_thread.h | ||
| 20 | macro_interpreter.cpp | 22 | macro_interpreter.cpp |
| 21 | macro_interpreter.h | 23 | macro_interpreter.h |
| 22 | memory_manager.cpp | 24 | memory_manager.cpp |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 49 | // We do this before actually writing the new data because the destination address might contain | 49 | // We do this before actually writing the new data because the destination address might contain |
| 50 | // a dirty surface that will have to be written back to memory. | 50 | // a dirty surface that will have to be written back to memory. |
| 51 | rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); | 51 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); |
| 52 | 52 | ||
| 53 | Memory::Write32(*dest_address, data); | 53 | Memory::Write32(*dest_address, data); |
| 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { | |||
| 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 94 | // copying. | 94 | // copying. |
| 95 | rasterizer.FlushRegion(*source_cpu, src_size); | 95 | Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); |
| 96 | 96 | ||
| 97 | // We have to invalidate the destination region to evict any outdated surfaces from the | 97 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 98 | // cache. We do this before actually writing the new data because the destination address | 98 | // cache. We do this before actually writing the new data because the destination address |
| 99 | // might contain a dirty surface that will have to be written back to memory. | 99 | // might contain a dirty surface that will have to be written back to memory. |
| 100 | rasterizer.InvalidateRegion(*dest_cpu, dst_size); | 100 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); |
| 101 | }; | 101 | }; |
| 102 | 102 | ||
| 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b0f3310e5..0d7a052dd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -6,12 +6,14 @@ | |||
| 6 | #include "core/core.h" | 6 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 7 | #include "core/core_timing.h" |
| 8 | #include "core/memory.h" | 8 | #include "core/memory.h" |
| 9 | #include "core/settings.h" | ||
| 9 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 10 | #include "video_core/engines/kepler_compute.h" | 11 | #include "video_core/engines/kepler_compute.h" |
| 11 | #include "video_core/engines/kepler_memory.h" | 12 | #include "video_core/engines/kepler_memory.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 14 | #include "video_core/engines/maxwell_dma.h" |
| 14 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 16 | #include "video_core/gpu_thread.h" | ||
| 15 | #include "video_core/renderer_base.h" | 17 | #include "video_core/renderer_base.h" |
| 16 | 18 | ||
| 17 | namespace Tegra { | 19 | namespace Tegra { |
| @@ -37,6 +39,10 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren | |||
| 37 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); | 39 | kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); |
| 38 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | 40 | maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); |
| 39 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); | 41 | kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); |
| 42 | |||
| 43 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 44 | gpu_thread = std::make_unique<VideoCommon::GPUThread::ThreadManager>(renderer, *dma_pusher); | ||
| 45 | } | ||
| 40 | } | 46 | } |
| 41 | 47 | ||
| 42 | GPU::~GPU() = default; | 48 | GPU::~GPU() = default; |
| @@ -66,13 +72,45 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 66 | } | 72 | } |
| 67 | 73 | ||
| 68 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { | 74 | void GPU::PushGPUEntries(Tegra::CommandList&& entries) { |
| 69 | dma_pusher->Push(std::move(entries)); | 75 | if (Settings::values.use_asynchronous_gpu_emulation) { |
| 70 | dma_pusher->DispatchCalls(); | 76 | gpu_thread->SubmitList(std::move(entries)); |
| 77 | } else { | ||
| 78 | dma_pusher->Push(std::move(entries)); | ||
| 79 | dma_pusher->DispatchCalls(); | ||
| 80 | } | ||
| 71 | } | 81 | } |
| 72 | 82 | ||
| 73 | void GPU::SwapBuffers( | 83 | void GPU::SwapBuffers( |
| 74 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 84 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
| 75 | renderer.SwapBuffers(std::move(framebuffer)); | 85 | if (Settings::values.use_asynchronous_gpu_emulation) { |
| 86 | gpu_thread->SwapBuffers(std::move(framebuffer)); | ||
| 87 | } else { | ||
| 88 | renderer.SwapBuffers(std::move(framebuffer)); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | void GPU::FlushRegion(VAddr addr, u64 size) { | ||
| 93 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 94 | gpu_thread->FlushRegion(addr, size); | ||
| 95 | } else { | ||
| 96 | renderer.Rasterizer().FlushRegion(addr, size); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | void GPU::InvalidateRegion(VAddr addr, u64 size) { | ||
| 101 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 102 | gpu_thread->InvalidateRegion(addr, size); | ||
| 103 | } else { | ||
| 104 | renderer.Rasterizer().InvalidateRegion(addr, size); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 109 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 110 | gpu_thread->FlushAndInvalidateRegion(addr, size); | ||
| 111 | } else { | ||
| 112 | renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); | ||
| 113 | } | ||
| 76 | } | 114 | } |
| 77 | 115 | ||
| 78 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { | 116 | u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 62649bd6e..3f3098bf1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -19,6 +19,10 @@ namespace VideoCore { | |||
| 19 | class RendererBase; | 19 | class RendererBase; |
| 20 | } // namespace VideoCore | 20 | } // namespace VideoCore |
| 21 | 21 | ||
| 22 | namespace VideoCommon::GPUThread { | ||
| 23 | class ThreadManager; | ||
| 24 | } // namespace VideoCommon::GPUThread | ||
| 25 | |||
| 22 | namespace Tegra { | 26 | namespace Tegra { |
| 23 | 27 | ||
| 24 | enum class RenderTargetFormat : u32 { | 28 | enum class RenderTargetFormat : u32 { |
| @@ -200,7 +204,7 @@ public: | |||
| 200 | std::array<u32, NUM_REGS> reg_array; | 204 | std::array<u32, NUM_REGS> reg_array; |
| 201 | }; | 205 | }; |
| 202 | } regs{}; | 206 | } regs{}; |
| 203 | 207 | ||
| 204 | /// Push GPU command entries to be processed | 208 | /// Push GPU command entries to be processed |
| 205 | void PushGPUEntries(Tegra::CommandList&& entries); | 209 | void PushGPUEntries(Tegra::CommandList&& entries); |
| 206 | 210 | ||
| @@ -208,6 +212,15 @@ public: | |||
| 208 | void SwapBuffers( | 212 | void SwapBuffers( |
| 209 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | 213 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); |
| 210 | 214 | ||
| 215 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 216 | void FlushRegion(VAddr addr, u64 size); | ||
| 217 | |||
| 218 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 219 | void InvalidateRegion(VAddr addr, u64 size); | ||
| 220 | |||
| 221 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 222 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||
| 223 | |||
| 211 | private: | 224 | private: |
| 212 | void ProcessBindMethod(const MethodCall& method_call); | 225 | void ProcessBindMethod(const MethodCall& method_call); |
| 213 | void ProcessSemaphoreTriggerMethod(); | 226 | void ProcessSemaphoreTriggerMethod(); |
| @@ -216,17 +229,18 @@ private: | |||
| 216 | 229 | ||
| 217 | /// Calls a GPU puller method. | 230 | /// Calls a GPU puller method. |
| 218 | void CallPullerMethod(const MethodCall& method_call); | 231 | void CallPullerMethod(const MethodCall& method_call); |
| 219 | 232 | ||
| 220 | /// Calls a GPU engine method. | 233 | /// Calls a GPU engine method. |
| 221 | void CallEngineMethod(const MethodCall& method_call); | 234 | void CallEngineMethod(const MethodCall& method_call); |
| 222 | 235 | ||
| 223 | /// Determines where the method should be executed. | 236 | /// Determines where the method should be executed. |
| 224 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | 237 | bool ExecuteMethodOnEngine(const MethodCall& method_call); |
| 225 | 238 | ||
| 226 | private: | 239 | private: |
| 227 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 240 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 228 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 241 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 229 | 242 | std::unique_ptr<VideoCommon::GPUThread::ThreadManager> gpu_thread; | |
| 243 | |||
| 230 | VideoCore::RendererBase& renderer; | 244 | VideoCore::RendererBase& renderer; |
| 231 | 245 | ||
| 232 | /// Mapping of command subchannels to their bound engine ids. | 246 | /// Mapping of command subchannels to their bound engine ids. |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..22c4cca4d --- /dev/null +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -0,0 +1,154 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/microprofile.h" | ||
| 7 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | ||
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/gpu_thread.h" | ||
| 12 | #include "video_core/renderer_base.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::GPUThread { | ||
| 15 | |||
| 16 | /// Executes a single GPU thread command | ||
| 17 | static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, | ||
| 18 | Tegra::DmaPusher& dma_pusher) { | ||
| 19 | if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { | ||
| 20 | dma_pusher.Push(std::move(submit_list->entries)); | ||
| 21 | dma_pusher.DispatchCalls(); | ||
| 22 | } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { | ||
| 23 | renderer.SwapBuffers(data->framebuffer); | ||
| 24 | } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { | ||
| 25 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||
| 26 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { | ||
| 27 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||
| 28 | } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { | ||
| 29 | renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); | ||
| 30 | } else { | ||
| 31 | UNREACHABLE(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | /// Runs the GPU thread | ||
| 36 | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | ||
| 37 | SynchState& state) { | ||
| 38 | |||
| 39 | MicroProfileOnThreadCreate("GpuThread"); | ||
| 40 | |||
| 41 | auto WaitForWakeup = [&]() { | ||
| 42 | std::unique_lock<std::mutex> lock{state.signal_mutex}; | ||
| 43 | state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Wait for first GPU command before acquiring the window context | ||
| 47 | WaitForWakeup(); | ||
| 48 | |||
| 49 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||
| 50 | if (!state.is_running) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | |||
| 54 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | ||
| 55 | |||
| 56 | while (state.is_running) { | ||
| 57 | if (!state.is_running) { | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | |||
| 61 | { | ||
| 62 | // Thread has been woken up, so make the previous write queue the next read queue | ||
| 63 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 64 | std::swap(state.push_queue, state.pop_queue); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Execute all of the GPU commands | ||
| 68 | while (!state.pop_queue->empty()) { | ||
| 69 | ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); | ||
| 70 | state.pop_queue->pop(); | ||
| 71 | } | ||
| 72 | |||
| 73 | // Signal that the GPU thread has finished processing commands | ||
| 74 | if (state.IsIdle()) { | ||
| 75 | state.idle_condition.notify_one(); | ||
| 76 | } | ||
| 77 | |||
| 78 | // Wait for CPU thread to send more GPU commands | ||
| 79 | WaitForWakeup(); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | ||
| 84 | : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), | ||
| 85 | std::ref(dma_pusher), std::ref(state)}, | ||
| 86 | thread_id{thread.get_id()} {} | ||
| 87 | |||
| 88 | ThreadManager::~ThreadManager() { | ||
| 89 | { | ||
| 90 | // Notify GPU thread that a shutdown is pending | ||
| 91 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 92 | state.is_running = false; | ||
| 93 | } | ||
| 94 | |||
| 95 | state.signal_condition.notify_one(); | ||
| 96 | thread.join(); | ||
| 97 | } | ||
| 98 | |||
| 99 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||
| 100 | if (entries.empty()) { | ||
| 101 | return; | ||
| 102 | } | ||
| 103 | |||
| 104 | PushCommand(SubmitListCommand(std::move(entries)), false, false); | ||
| 105 | } | ||
| 106 | |||
| 107 | void ThreadManager::SwapBuffers( | ||
| 108 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 109 | PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); | ||
| 110 | } | ||
| 111 | |||
| 112 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||
| 113 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 114 | PushCommand(FlushRegionCommand(addr, size), true, false); | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||
| 119 | PushCommand(InvalidateRegionCommand(addr, size), true, true); | ||
| 120 | } | ||
| 121 | |||
| 122 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 123 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 124 | PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); | ||
| 125 | } else { | ||
| 126 | InvalidateRegion(addr, size); | ||
| 127 | } | ||
| 128 | } | ||
| 129 | |||
| 130 | void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { | ||
| 131 | { | ||
| 132 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 133 | |||
| 134 | if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { | ||
| 135 | // Execute the command synchronously on the current thread | ||
| 136 | ExecuteCommand(&command_data, renderer, dma_pusher); | ||
| 137 | return; | ||
| 138 | } | ||
| 139 | |||
| 140 | // Push the command to the GPU thread | ||
| 141 | state.push_queue->emplace(command_data); | ||
| 142 | } | ||
| 143 | |||
| 144 | // Signal the GPU thread that commands are pending | ||
| 145 | state.signal_condition.notify_one(); | ||
| 146 | |||
| 147 | if (wait_for_idle) { | ||
| 148 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 149 | std::unique_lock<std::mutex> lock{state.idle_mutex}; | ||
| 150 | state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..ad9f9462b --- /dev/null +++ b/src/video_core/gpu_thread.h | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <memory> | ||
| 11 | #include <mutex> | ||
| 12 | #include <optional> | ||
| 13 | #include <thread> | ||
| 14 | #include <variant> | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | struct FramebufferConfig; | ||
| 18 | class DmaPusher; | ||
| 19 | } // namespace Tegra | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RendererBase; | ||
| 23 | } // namespace VideoCore | ||
| 24 | |||
| 25 | namespace VideoCommon::GPUThread { | ||
| 26 | |||
| 27 | /// Command to signal to the GPU thread that a command list is ready for processing | ||
| 28 | struct SubmitListCommand final { | ||
| 29 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | ||
| 30 | |||
| 31 | Tegra::CommandList entries; | ||
| 32 | }; | ||
| 33 | |||
| 34 | /// Command to signal to the GPU thread that a swap buffers is pending | ||
| 35 | struct SwapBuffersCommand final { | ||
| 36 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | ||
| 37 | : framebuffer{std::move(framebuffer)} {} | ||
| 38 | |||
| 39 | std::optional<const Tegra::FramebufferConfig> framebuffer; | ||
| 40 | }; | ||
| 41 | |||
| 42 | /// Command to signal to the GPU thread to flush a region | ||
| 43 | struct FlushRegionCommand final { | ||
| 44 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 45 | |||
| 46 | const VAddr addr; | ||
| 47 | const u64 size; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /// Command to signal to the GPU thread to invalidate a region | ||
| 51 | struct InvalidateRegionCommand final { | ||
| 52 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 53 | |||
| 54 | const VAddr addr; | ||
| 55 | const u64 size; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /// Command to signal to the GPU thread to flush and invalidate a region | ||
| 59 | struct FlushAndInvalidateRegionCommand final { | ||
| 60 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | ||
| 61 | : addr{addr}, size{size} {} | ||
| 62 | |||
| 63 | const VAddr addr; | ||
| 64 | const u64 size; | ||
| 65 | }; | ||
| 66 | |||
| 67 | using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||
| 68 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||
| 69 | |||
| 70 | /// Struct used to synchronize the GPU thread | ||
| 71 | struct SynchState final { | ||
| 72 | std::atomic<bool> is_running{true}; | ||
| 73 | std::condition_variable signal_condition; | ||
| 74 | std::mutex signal_mutex; | ||
| 75 | std::condition_variable idle_condition; | ||
| 76 | std::mutex idle_mutex; | ||
| 77 | |||
| 78 | // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and | ||
| 79 | // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes | ||
| 80 | // empty. This allows for efficient thread-safe access, as it does not require any copies. | ||
| 81 | |||
| 82 | using CommandQueue = std::queue<CommandData>; | ||
| 83 | std::array<CommandQueue, 2> command_queues; | ||
| 84 | CommandQueue* push_queue{&command_queues[0]}; | ||
| 85 | CommandQueue* pop_queue{&command_queues[1]}; | ||
| 86 | |||
| 87 | /// Returns true if the GPU thread should be idle, meaning there are no commands to process | ||
| 88 | bool IsIdle() const { | ||
| 89 | return command_queues[0].empty() && command_queues[1].empty(); | ||
| 90 | } | ||
| 91 | }; | ||
| 92 | |||
| 93 | /// Class used to manage the GPU thread | ||
| 94 | class ThreadManager final { | ||
| 95 | public: | ||
| 96 | explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); | ||
| 97 | ~ThreadManager(); | ||
| 98 | |||
| 99 | /// Push GPU command entries to be processed | ||
| 100 | void SubmitList(Tegra::CommandList&& entries); | ||
| 101 | |||
| 102 | /// Swap buffers (render frame) | ||
| 103 | void SwapBuffers( | ||
| 104 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||
| 105 | |||
| 106 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 107 | void FlushRegion(VAddr addr, u64 size); | ||
| 108 | |||
| 109 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 110 | void InvalidateRegion(VAddr addr, u64 size); | ||
| 111 | |||
| 112 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 113 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||
| 114 | |||
| 115 | /// Waits the caller until the GPU thread is idle, used for synchronization | ||
| 116 | void WaitForIdle(); | ||
| 117 | |||
| 118 | private: | ||
| 119 | /// Pushes a command to be executed by the GPU thread | ||
| 120 | void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); | ||
| 121 | |||
| 122 | /// Returns true if this is called by the GPU thread | ||
| 123 | bool IsGpuThread() const { | ||
| 124 | return std::this_thread::get_id() == thread_id; | ||
| 125 | } | ||
| 126 | |||
| 127 | private: | ||
| 128 | SynchState state; | ||
| 129 | std::thread thread; | ||
| 130 | std::thread::id thread_id; | ||
| 131 | VideoCore::RendererBase& renderer; | ||
| 132 | Tegra::DmaPusher& dma_pusher; | ||
| 133 | }; | ||
| 134 | |||
| 135 | } // namespace VideoCommon::GPUThread | ||