diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/gpu.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.h | 1 | ||||
| -rw-r--r-- | src/video_core/gpu_synch.h | 1 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 5 |
10 files changed, 38 insertions, 24 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index fbb9105d6..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | ||
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| @@ -17,6 +18,8 @@ | |||
| 17 | 18 | ||
| 18 | namespace Tegra { | 19 | namespace Tegra { |
| 19 | 20 | ||
| 21 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 22 | |||
| 20 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | 23 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 21 | : system{system}, renderer{renderer}, is_async{is_async} { | 24 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 22 | auto& rasterizer{renderer.Rasterizer()}; | 25 | auto& rasterizer{renderer.Rasterizer()}; |
| @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 63 | return *dma_pusher; | 66 | return *dma_pusher; |
| 64 | } | 67 | } |
| 65 | 68 | ||
| 69 | void GPU::WaitFence(u32 syncpoint_id, u32 value) const { | ||
| 70 | // Synced GPU, is always in sync | ||
| 71 | if (!is_async) { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 75 | while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 66 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | 79 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { |
| 67 | syncpoints[syncpoint_id]++; | 80 | syncpoints[syncpoint_id]++; |
| 68 | std::lock_guard lock{sync_mutex}; | 81 | std::lock_guard lock{sync_mutex}; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -177,6 +177,12 @@ public: | |||
| 177 | /// Returns a reference to the GPU DMA pusher. | 177 | /// Returns a reference to the GPU DMA pusher. |
| 178 | Tegra::DmaPusher& DmaPusher(); | 178 | Tegra::DmaPusher& DmaPusher(); |
| 179 | 179 | ||
| 180 | // Waits for the GPU to finish working | ||
| 181 | virtual void WaitIdle() const = 0; | ||
| 182 | |||
| 183 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||
| 184 | void WaitFence(u32 syncpoint_id, u32 value) const; | ||
| 185 | |||
| 180 | void IncrementSyncPoint(u32 syncpoint_id); | 186 | void IncrementSyncPoint(u32 syncpoint_id); |
| 181 | 187 | ||
| 182 | u32 GetSyncpointValue(u32 syncpoint_id) const; | 188 | u32 GetSyncpointValue(u32 syncpoint_id) const; |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con | |||
| 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void GPUAsynch::WaitIdle() const { | ||
| 48 | gpu_thread.WaitIdle(); | ||
| 49 | } | ||
| 50 | |||
| 47 | } // namespace VideoCommon | 51 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -25,6 +25,7 @@ public: | |||
| 25 | void FlushRegion(CacheAddr addr, u64 size) override; | 25 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | void WaitIdle() const override; | ||
| 28 | 29 | ||
| 29 | protected: | 30 | protected: |
| 30 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | 31 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -24,6 +24,7 @@ public: | |||
| 24 | void FlushRegion(CacheAddr addr, u64 size) override; | 24 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void WaitIdle() const override {} | ||
| 27 | 28 | ||
| 28 | protected: | 29 | protected: |
| 29 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | 30 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | ||
| 9 | #include "core/core_timing_util.h" | ||
| 10 | #include "core/frontend/scope_acquire_window_context.h" | 8 | #include "core/frontend/scope_acquire_window_context.h" |
| 11 | #include "video_core/dma_pusher.h" | 9 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { | |||
| 68 | 66 | ||
| 69 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { | 67 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { |
| 70 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; | 68 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; |
| 71 | synchronization_event = system.CoreTiming().RegisterEvent( | ||
| 72 | "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||
| 73 | } | 69 | } |
| 74 | 70 | ||
| 75 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 71 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 76 | const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; | 72 | PushCommand(SubmitListCommand(std::move(entries))); |
| 77 | const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; | ||
| 78 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | ||
| 79 | } | 73 | } |
| 80 | 74 | ||
| 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 75 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 96 | InvalidateRegion(addr, size); | 90 | InvalidateRegion(addr, size); |
| 97 | } | 91 | } |
| 98 | 92 | ||
| 93 | void ThreadManager::WaitIdle() const { | ||
| 94 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 99 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 100 | const u64 fence{++state.last_fence}; | 99 | const u64 fence{++state.last_fence}; |
| 101 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 102 | return fence; | 101 | return fence; |
| 103 | } | 102 | } |
| 104 | 103 | ||
| 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 106 | void SynchState::WaitForSynchronization(u64 fence) { | ||
| 107 | while (signaled_fence.load() < fence) | ||
| 108 | ; | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::GPUThread | 104 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -21,9 +21,6 @@ class DmaPusher; | |||
| 21 | 21 | ||
| 22 | namespace Core { | 22 | namespace Core { |
| 23 | class System; | 23 | class System; |
| 24 | namespace Timing { | ||
| 25 | struct EventType; | ||
| 26 | } // namespace Timing | ||
| 27 | } // namespace Core | 24 | } // namespace Core |
| 28 | 25 | ||
| 29 | namespace VideoCommon::GPUThread { | 26 | namespace VideoCommon::GPUThread { |
| @@ -89,8 +86,6 @@ struct CommandDataContainer { | |||
| 89 | struct SynchState final { | 86 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 87 | std::atomic_bool is_running{true}; |
| 91 | 88 | ||
| 92 | void WaitForSynchronization(u64 fence); | ||
| 93 | |||
| 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 89 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 95 | CommandQueue queue; | 90 | CommandQueue queue; |
| 96 | u64 last_fence{}; | 91 | u64 last_fence{}; |
| @@ -121,6 +116,9 @@ public: | |||
| 121 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 116 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 122 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | 117 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); |
| 123 | 118 | ||
| 119 | // Wait until the gpu thread is idle. | ||
| 120 | void WaitIdle() const; | ||
| 121 | |||
| 124 | private: | 122 | private: |
| 125 | /// Pushes a command to be executed by the GPU thread | 123 | /// Pushes a command to be executed by the GPU thread |
| 126 | u64 PushCommand(CommandData&& command_data); | 124 | u64 PushCommand(CommandData&& command_data); |
| @@ -128,7 +126,6 @@ private: | |||
| 128 | private: | 126 | private: |
| 129 | SynchState state; | 127 | SynchState state; |
| 130 | Core::System& system; | 128 | Core::System& system; |
| 131 | Core::Timing::EventType* synchronization_event{}; | ||
| 132 | std::thread thread; | 129 | std::thread thread; |
| 133 | std::thread::id thread_id; | 130 | std::thread::id thread_id; |
| 134 | }; | 131 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..cbcf81414 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | |||
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 350 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 351 | std::lock_guard lock{pages_mutex}; | ||
| 351 | const u64 page_start{addr >> Memory::PAGE_BITS}; | 352 | const u64 page_start{addr >> Memory::PAGE_BITS}; |
| 352 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | 353 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; |
| 353 | 354 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <map> | 10 | #include <map> |
| 11 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 12 | #include <optional> | 13 | #include <optional> |
| 13 | #include <tuple> | 14 | #include <tuple> |
| 14 | #include <utility> | 15 | #include <utility> |
| @@ -230,6 +231,8 @@ private: | |||
| 230 | 231 | ||
| 231 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 232 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 232 | CachedPageMap cached_pages; | 233 | CachedPageMap cached_pages; |
| 234 | |||
| 235 | std::mutex pages_mutex; | ||
| 233 | }; | 236 | }; |
| 234 | 237 | ||
| 235 | } // namespace OpenGL | 238 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 105 | system.GetPerfStats().EndSystemFrame(); | ||
| 106 | |||
| 107 | // Maintain the rasterizer's state as a priority | 105 | // Maintain the rasterizer's state as a priority |
| 108 | OpenGLState prev_state = OpenGLState::GetCurState(); | 106 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 109 | state.AllDirty(); | 107 | state.AllDirty(); |
| @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 135 | 133 | ||
| 136 | render_window.PollEvents(); | 134 | render_window.PollEvents(); |
| 137 | 135 | ||
| 138 | system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||
| 139 | system.GetPerfStats().BeginSystemFrame(); | ||
| 140 | |||
| 141 | // Restore the rasterizer state | 136 | // Restore the rasterizer state |
| 142 | prev_state.AllDirty(); | 137 | prev_state.AllDirty(); |
| 143 | prev_state.Apply(); | 138 | prev_state.Apply(); |