diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_memory.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 55 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.h | 37 | ||||
| -rw-r--r-- | src/video_core/gpu_synch.cpp | 37 | ||||
| -rw-r--r-- | src/video_core/gpu_synch.h | 29 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 152 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 136 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 |
12 files changed, 480 insertions, 26 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c1ae83f4d..57f31cd58 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -17,6 +17,12 @@ add_library(video_core STATIC | |||
| 17 | engines/shader_header.h | 17 | engines/shader_header.h |
| 18 | gpu.cpp | 18 | gpu.cpp |
| 19 | gpu.h | 19 | gpu.h |
| 20 | gpu_asynch.cpp | ||
| 21 | gpu_asynch.h | ||
| 22 | gpu_synch.cpp | ||
| 23 | gpu_synch.h | ||
| 24 | gpu_thread.cpp | ||
| 25 | gpu_thread.h | ||
| 20 | macro_interpreter.cpp | 26 | macro_interpreter.cpp |
| 21 | macro_interpreter.h | 27 | macro_interpreter.h |
| 22 | memory_manager.cpp | 28 | memory_manager.cpp |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 49 | // We do this before actually writing the new data because the destination address might contain | 49 | // We do this before actually writing the new data because the destination address might contain |
| 50 | // a dirty surface that will have to be written back to memory. | 50 | // a dirty surface that will have to be written back to memory. |
| 51 | rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); | 51 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); |
| 52 | 52 | ||
| 53 | Memory::Write32(*dest_address, data); | 53 | Memory::Write32(*dest_address, data); |
| 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { | |||
| 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 94 | // copying. | 94 | // copying. |
| 95 | rasterizer.FlushRegion(*source_cpu, src_size); | 95 | Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); |
| 96 | 96 | ||
| 97 | // We have to invalidate the destination region to evict any outdated surfaces from the | 97 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 98 | // cache. We do this before actually writing the new data because the destination address | 98 | // cache. We do this before actually writing the new data because the destination address |
| 99 | // might contain a dirty surface that will have to be written back to memory. | 99 | // might contain a dirty surface that will have to be written back to memory. |
| 100 | rasterizer.InvalidateRegion(*dest_cpu, dst_size); | 100 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); |
| 101 | }; | 101 | }; |
| 102 | 102 | ||
| 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ac30d1a89..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 13 | #include "video_core/engines/maxwell_dma.h" |
| 14 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/renderer_base.h" |
| 16 | 16 | ||
| 17 | namespace Tegra { | 17 | namespace Tegra { |
| 18 | 18 | ||
| @@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 28 | UNREACHABLE(); | 28 | UNREACHABLE(); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { | 31 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { |
| 32 | auto& rasterizer{renderer.Rasterizer()}; | ||
| 32 | memory_manager = std::make_unique<Tegra::MemoryManager>(); | 33 | memory_manager = std::make_unique<Tegra::MemoryManager>(); |
| 33 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 34 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 34 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 35 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..14a421cc1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -16,8 +16,8 @@ class System; | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | namespace VideoCore { | 18 | namespace VideoCore { |
| 19 | class RasterizerInterface; | 19 | class RendererBase; |
| 20 | } | 20 | } // namespace VideoCore |
| 21 | 21 | ||
| 22 | namespace Tegra { | 22 | namespace Tegra { |
| 23 | 23 | ||
| @@ -119,9 +119,10 @@ enum class EngineID { | |||
| 119 | MAXWELL_DMA_COPY_A = 0xB0B5, | 119 | MAXWELL_DMA_COPY_A = 0xB0B5, |
| 120 | }; | 120 | }; |
| 121 | 121 | ||
| 122 | class GPU final { | 122 | class GPU { |
| 123 | public: | 123 | public: |
| 124 | explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); | 124 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); |
| 125 | |||
| 125 | ~GPU(); | 126 | ~GPU(); |
| 126 | 127 | ||
| 127 | struct MethodCall { | 128 | struct MethodCall { |
| @@ -200,8 +201,42 @@ public: | |||
| 200 | }; | 201 | }; |
| 201 | } regs{}; | 202 | } regs{}; |
| 202 | 203 | ||
| 204 | /// Push GPU command entries to be processed | ||
| 205 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | ||
| 206 | |||
| 207 | /// Swap buffers (render frame) | ||
| 208 | virtual void SwapBuffers( | ||
| 209 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 210 | |||
| 211 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 212 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | ||
| 213 | |||
| 214 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 215 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | ||
| 216 | |||
| 217 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 218 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||
| 219 | |||
| 203 | private: | 220 | private: |
| 221 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 222 | void ProcessSemaphoreTriggerMethod(); | ||
| 223 | void ProcessSemaphoreRelease(); | ||
| 224 | void ProcessSemaphoreAcquire(); | ||
| 225 | |||
| 226 | /// Calls a GPU puller method. | ||
| 227 | void CallPullerMethod(const MethodCall& method_call); | ||
| 228 | |||
| 229 | /// Calls a GPU engine method. | ||
| 230 | void CallEngineMethod(const MethodCall& method_call); | ||
| 231 | |||
| 232 | /// Determines where the method should be executed. | ||
| 233 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 234 | |||
| 235 | protected: | ||
| 204 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 236 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 237 | VideoCore::RendererBase& renderer; | ||
| 238 | |||
| 239 | private: | ||
| 205 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 240 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 206 | 241 | ||
| 207 | /// Mapping of command subchannels to their bound engine ids. | 242 | /// Mapping of command subchannels to their bound engine ids. |
| @@ -217,18 +252,6 @@ private: | |||
| 217 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 252 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 218 | /// Inline memory engine | 253 | /// Inline memory engine |
| 219 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 254 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 220 | |||
| 221 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 222 | void ProcessSemaphoreTriggerMethod(); | ||
| 223 | void ProcessSemaphoreRelease(); | ||
| 224 | void ProcessSemaphoreAcquire(); | ||
| 225 | |||
| 226 | // Calls a GPU puller method. | ||
| 227 | void CallPullerMethod(const MethodCall& method_call); | ||
| 228 | // Calls a GPU engine method. | ||
| 229 | void CallEngineMethod(const MethodCall& method_call); | ||
| 230 | // Determines where the method should be executed. | ||
| 231 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 232 | }; | 255 | }; |
| 233 | 256 | ||
| 234 | #define ASSERT_REG_POSITION(field_name, position) \ | 257 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_asynch.h" | ||
| 6 | #include "video_core/gpu_thread.h" | ||
| 7 | #include "video_core/renderer_base.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | ||
| 12 | : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} | ||
| 13 | |||
| 14 | GPUAsynch::~GPUAsynch() = default; | ||
| 15 | |||
| 16 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 17 | gpu_thread.SubmitList(std::move(entries)); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUAsynch::SwapBuffers( | ||
| 21 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 22 | gpu_thread.SwapBuffers(std::move(framebuffer)); | ||
| 23 | } | ||
| 24 | |||
| 25 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | ||
| 26 | gpu_thread.FlushRegion(addr, size); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 30 | gpu_thread.InvalidateRegion(addr, size); | ||
| 31 | } | ||
| 32 | |||
| 33 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 34 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..58046f3e9 --- /dev/null +++ b/src/video_core/gpu_asynch.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | #include "video_core/gpu_thread.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | class RendererBase; | ||
| 12 | } // namespace VideoCore | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | namespace GPUThread { | ||
| 17 | class ThreadManager; | ||
| 18 | } // namespace GPUThread | ||
| 19 | |||
| 20 | /// Implementation of GPU interface that runs the GPU asynchronously | ||
| 21 | class GPUAsynch : public Tegra::GPU { | ||
| 22 | public: | ||
| 23 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); | ||
| 24 | ~GPUAsynch(); | ||
| 25 | |||
| 26 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 27 | void SwapBuffers( | ||
| 28 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 29 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 30 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 31 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 32 | |||
| 33 | private: | ||
| 34 | GPUThread::ThreadManager gpu_thread; | ||
| 35 | }; | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_synch.h" | ||
| 6 | #include "video_core/renderer_base.h" | ||
| 7 | |||
| 8 | namespace VideoCommon { | ||
| 9 | |||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | ||
| 11 | : Tegra::GPU(system, renderer) {} | ||
| 12 | |||
| 13 | GPUSynch::~GPUSynch() = default; | ||
| 14 | |||
| 15 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 16 | dma_pusher->Push(std::move(entries)); | ||
| 17 | dma_pusher->DispatchCalls(); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUSynch::SwapBuffers( | ||
| 21 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 22 | renderer.SwapBuffers(std::move(framebuffer)); | ||
| 23 | } | ||
| 24 | |||
| 25 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { | ||
| 26 | renderer.Rasterizer().FlushRegion(addr, size); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 30 | renderer.Rasterizer().InvalidateRegion(addr, size); | ||
| 31 | } | ||
| 32 | |||
| 33 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 34 | renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..658f683e2 --- /dev/null +++ b/src/video_core/gpu_synch.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | |||
| 9 | namespace VideoCore { | ||
| 10 | class RendererBase; | ||
| 11 | } // namespace VideoCore | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | /// Implementation of GPU interface that runs the GPU synchronously | ||
| 16 | class GPUSynch : public Tegra::GPU { | ||
| 17 | public: | ||
| 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); | ||
| 19 | ~GPUSynch(); | ||
| 20 | |||
| 21 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 22 | void SwapBuffers( | ||
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 24 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 25 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 26 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 27 | }; | ||
| 28 | |||
| 29 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..c5bdd2a17 --- /dev/null +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -0,0 +1,152 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/microprofile.h" | ||
| 7 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | ||
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/gpu_thread.h" | ||
| 12 | #include "video_core/renderer_base.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::GPUThread { | ||
| 15 | |||
| 16 | /// Executes a single GPU thread command | ||
| 17 | static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, | ||
| 18 | Tegra::DmaPusher& dma_pusher) { | ||
| 19 | if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { | ||
| 20 | dma_pusher.Push(std::move(submit_list->entries)); | ||
| 21 | dma_pusher.DispatchCalls(); | ||
| 22 | } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { | ||
| 23 | renderer.SwapBuffers(data->framebuffer); | ||
| 24 | } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { | ||
| 25 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||
| 26 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { | ||
| 27 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||
| 28 | } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { | ||
| 29 | renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); | ||
| 30 | } else { | ||
| 31 | UNREACHABLE(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | /// Runs the GPU thread | ||
| 36 | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | ||
| 37 | SynchState& state) { | ||
| 38 | |||
| 39 | MicroProfileOnThreadCreate("GpuThread"); | ||
| 40 | |||
| 41 | auto WaitForWakeup = [&]() { | ||
| 42 | std::unique_lock<std::mutex> lock{state.signal_mutex}; | ||
| 43 | state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Wait for first GPU command before acquiring the window context | ||
| 47 | WaitForWakeup(); | ||
| 48 | |||
| 49 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||
| 50 | if (!state.is_running) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | |||
| 54 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | ||
| 55 | |||
| 56 | while (state.is_running) { | ||
| 57 | if (!state.is_running) { | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | |||
| 61 | { | ||
| 62 | // Thread has been woken up, so make the previous write queue the next read queue | ||
| 63 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 64 | std::swap(state.push_queue, state.pop_queue); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Execute all of the GPU commands | ||
| 68 | while (!state.pop_queue->empty()) { | ||
| 69 | ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); | ||
| 70 | state.pop_queue->pop(); | ||
| 71 | } | ||
| 72 | |||
| 73 | state.UpdateIdleState(); | ||
| 74 | |||
| 75 | // Signal that the GPU thread has finished processing commands | ||
| 76 | if (state.is_idle) { | ||
| 77 | state.idle_condition.notify_one(); | ||
| 78 | } | ||
| 79 | |||
| 80 | // Wait for CPU thread to send more GPU commands | ||
| 81 | WaitForWakeup(); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | ||
| 86 | : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), | ||
| 87 | std::ref(dma_pusher), std::ref(state)}, | ||
| 88 | thread_id{thread.get_id()} {} | ||
| 89 | |||
| 90 | ThreadManager::~ThreadManager() { | ||
| 91 | { | ||
| 92 | // Notify GPU thread that a shutdown is pending | ||
| 93 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 94 | state.is_running = false; | ||
| 95 | } | ||
| 96 | |||
| 97 | state.signal_condition.notify_one(); | ||
| 98 | thread.join(); | ||
| 99 | } | ||
| 100 | |||
| 101 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||
| 102 | if (entries.empty()) { | ||
| 103 | return; | ||
| 104 | } | ||
| 105 | |||
| 106 | PushCommand(SubmitListCommand(std::move(entries)), false, false); | ||
| 107 | } | ||
| 108 | |||
| 109 | void ThreadManager::SwapBuffers( | ||
| 110 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 111 | PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); | ||
| 112 | } | ||
| 113 | |||
| 114 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||
| 115 | // Block the CPU when using accurate emulation | ||
| 116 | PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); | ||
| 117 | } | ||
| 118 | |||
| 119 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||
| 120 | PushCommand(InvalidateRegionCommand(addr, size), true, true); | ||
| 121 | } | ||
| 122 | |||
| 123 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 124 | InvalidateRegion(addr, size); | ||
| 125 | } | ||
| 126 | |||
| 127 | void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { | ||
| 128 | { | ||
| 129 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 130 | |||
| 131 | if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { | ||
| 132 | // Execute the command synchronously on the current thread | ||
| 133 | ExecuteCommand(&command_data, renderer, dma_pusher); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | |||
| 137 | // Push the command to the GPU thread | ||
| 138 | state.UpdateIdleState(); | ||
| 139 | state.push_queue->emplace(command_data); | ||
| 140 | } | ||
| 141 | |||
| 142 | // Signal the GPU thread that commands are pending | ||
| 143 | state.signal_condition.notify_one(); | ||
| 144 | |||
| 145 | if (wait_for_idle) { | ||
| 146 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 147 | std::unique_lock<std::mutex> lock{state.idle_mutex}; | ||
| 148 | state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); }); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..2ad8214cc --- /dev/null +++ b/src/video_core/gpu_thread.h | |||
| @@ -0,0 +1,136 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <memory> | ||
| 11 | #include <mutex> | ||
| 12 | #include <optional> | ||
| 13 | #include <thread> | ||
| 14 | #include <variant> | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | struct FramebufferConfig; | ||
| 18 | class DmaPusher; | ||
| 19 | } // namespace Tegra | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RendererBase; | ||
| 23 | } // namespace VideoCore | ||
| 24 | |||
| 25 | namespace VideoCommon::GPUThread { | ||
| 26 | |||
| 27 | /// Command to signal to the GPU thread that a command list is ready for processing | ||
| 28 | struct SubmitListCommand final { | ||
| 29 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | ||
| 30 | |||
| 31 | Tegra::CommandList entries; | ||
| 32 | }; | ||
| 33 | |||
| 34 | /// Command to signal to the GPU thread that a swap buffers is pending | ||
| 35 | struct SwapBuffersCommand final { | ||
| 36 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | ||
| 37 | : framebuffer{std::move(framebuffer)} {} | ||
| 38 | |||
| 39 | std::optional<const Tegra::FramebufferConfig> framebuffer; | ||
| 40 | }; | ||
| 41 | |||
| 42 | /// Command to signal to the GPU thread to flush a region | ||
| 43 | struct FlushRegionCommand final { | ||
| 44 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 45 | |||
| 46 | const VAddr addr; | ||
| 47 | const u64 size; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /// Command to signal to the GPU thread to invalidate a region | ||
| 51 | struct InvalidateRegionCommand final { | ||
| 52 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 53 | |||
| 54 | const VAddr addr; | ||
| 55 | const u64 size; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /// Command to signal to the GPU thread to flush and invalidate a region | ||
| 59 | struct FlushAndInvalidateRegionCommand final { | ||
| 60 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | ||
| 61 | : addr{addr}, size{size} {} | ||
| 62 | |||
| 63 | const VAddr addr; | ||
| 64 | const u64 size; | ||
| 65 | }; | ||
| 66 | |||
| 67 | using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||
| 68 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||
| 69 | |||
| 70 | /// Struct used to synchronize the GPU thread | ||
| 71 | struct SynchState final { | ||
| 72 | std::atomic<bool> is_running{true}; | ||
| 73 | std::atomic<bool> is_idle{true}; | ||
| 74 | std::condition_variable signal_condition; | ||
| 75 | std::mutex signal_mutex; | ||
| 76 | std::condition_variable idle_condition; | ||
| 77 | std::mutex idle_mutex; | ||
| 78 | |||
| 79 | // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and | ||
| 80 | // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes | ||
| 81 | // empty. This allows for efficient thread-safe access, as it does not require any copies. | ||
| 82 | |||
| 83 | using CommandQueue = std::queue<CommandData>; | ||
| 84 | std::array<CommandQueue, 2> command_queues; | ||
| 85 | CommandQueue* push_queue{&command_queues[0]}; | ||
| 86 | CommandQueue* pop_queue{&command_queues[1]}; | ||
| 87 | |||
| 88 | void UpdateIdleState() { | ||
| 89 | std::lock_guard<std::mutex> lock{idle_mutex}; | ||
| 90 | is_idle = command_queues[0].empty() && command_queues[1].empty(); | ||
| 91 | } | ||
| 92 | }; | ||
| 93 | |||
| 94 | /// Class used to manage the GPU thread | ||
| 95 | class ThreadManager final { | ||
| 96 | public: | ||
| 97 | explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); | ||
| 98 | ~ThreadManager(); | ||
| 99 | |||
| 100 | /// Push GPU command entries to be processed | ||
| 101 | void SubmitList(Tegra::CommandList&& entries); | ||
| 102 | |||
| 103 | /// Swap buffers (render frame) | ||
| 104 | void SwapBuffers( | ||
| 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||
| 106 | |||
| 107 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 108 | void FlushRegion(VAddr addr, u64 size); | ||
| 109 | |||
| 110 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 111 | void InvalidateRegion(VAddr addr, u64 size); | ||
| 112 | |||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 114 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||
| 115 | |||
| 116 | /// Waits the caller until the GPU thread is idle, used for synchronization | ||
| 117 | void WaitForIdle(); | ||
| 118 | |||
| 119 | private: | ||
| 120 | /// Pushes a command to be executed by the GPU thread | ||
| 121 | void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); | ||
| 122 | |||
| 123 | /// Returns true if this is called by the GPU thread | ||
| 124 | bool IsGpuThread() const { | ||
| 125 | return std::this_thread::get_id() == thread_id; | ||
| 126 | } | ||
| 127 | |||
| 128 | private: | ||
| 129 | SynchState state; | ||
| 130 | std::thread thread; | ||
| 131 | std::thread::id thread_id; | ||
| 132 | VideoCore::RendererBase& renderer; | ||
| 133 | Tegra::DmaPusher& dma_pusher; | ||
| 134 | }; | ||
| 135 | |||
| 136 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 321d9dd3d..168288088 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -749,11 +749,7 @@ void RasterizerOpenGL::FlushAll() {} | |||
| 749 | 749 | ||
| 750 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 750 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 751 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 751 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 752 | 752 | res_cache.FlushRegion(addr, size); | |
| 753 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 754 | // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit | ||
| 755 | res_cache.FlushRegion(addr, size); | ||
| 756 | } | ||
| 757 | } | 753 | } |
| 758 | 754 | ||
| 759 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 755 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |