summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h1
-rw-r--r--src/video_core/gpu_synch.h1
-rw-r--r--src/video_core/gpu_thread.cpp19
-rw-r--r--src/video_core/gpu_thread.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp5
10 files changed, 38 insertions, 24 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index fbb9105d6..095660115 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/core_timing.h" 8#include "core/core_timing.h"
8#include "core/memory.h" 9#include "core/memory.h"
@@ -17,6 +18,8 @@
17 18
18namespace Tegra { 19namespace Tegra {
19 20
21MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
22
20GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) 23GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
21 : system{system}, renderer{renderer}, is_async{is_async} { 24 : system{system}, renderer{renderer}, is_async{is_async} {
22 auto& rasterizer{renderer.Rasterizer()}; 25 auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
63 return *dma_pusher; 66 return *dma_pusher;
64} 67}
65 68
69void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
70 // Synced GPU, is always in sync
71 if (!is_async) {
72 return;
73 }
74 MICROPROFILE_SCOPE(GPU_wait);
75 while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
76 }
77}
78
66void GPU::IncrementSyncPoint(const u32 syncpoint_id) { 79void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
67 syncpoints[syncpoint_id]++; 80 syncpoints[syncpoint_id]++;
68 std::lock_guard lock{sync_mutex}; 81 std::lock_guard lock{sync_mutex};
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..dbca19f35 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,12 @@ public:
177 /// Returns a reference to the GPU DMA pusher. 177 /// Returns a reference to the GPU DMA pusher.
178 Tegra::DmaPusher& DmaPusher(); 178 Tegra::DmaPusher& DmaPusher();
179 179
180 // Waits for the GPU to finish working
181 virtual void WaitIdle() const = 0;
182
183 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
184 void WaitFence(u32 syncpoint_id, u32 value) const;
185
180 void IncrementSyncPoint(u32 syncpoint_id); 186 void IncrementSyncPoint(u32 syncpoint_id);
181 187
182 u32 GetSyncpointValue(u32 syncpoint_id) const; 188 u32 GetSyncpointValue(u32 syncpoint_id) const;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index f2a3a390e..04222d060 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
44 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); 44 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
45} 45}
46 46
47void GPUAsynch::WaitIdle() const {
48 gpu_thread.WaitIdle();
49}
50
47} // namespace VideoCommon 51} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index a12f9bac4..1241ade1d 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,7 @@ public:
25 void FlushRegion(CacheAddr addr, u64 size) override; 25 void FlushRegion(CacheAddr addr, u64 size) override;
26 void InvalidateRegion(CacheAddr addr, u64 size) override; 26 void InvalidateRegion(CacheAddr addr, u64 size) override;
27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
28 void WaitIdle() const override;
28 29
29protected: 30protected:
30 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; 31 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 5eb1c461c..c71baee89 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,7 @@ public:
24 void FlushRegion(CacheAddr addr, u64 size) override; 24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(CacheAddr addr, u64 size) override; 25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27 void WaitIdle() const override {}
27 28
28protected: 29protected:
29 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, 30 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..758a37f14 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
10#include "core/frontend/scope_acquire_window_context.h" 8#include "core/frontend/scope_acquire_window_context.h"
11#include "video_core/dma_pusher.h" 9#include "video_core/dma_pusher.h"
12#include "video_core/gpu.h" 10#include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
68 66
69void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { 67void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
70 thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; 68 thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
71 synchronization_event = system.CoreTiming().RegisterEvent(
72 "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
73} 69}
74 70
75void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 71void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
76 const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; 72 PushCommand(SubmitListCommand(std::move(entries)));
77 const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
78 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
79} 73}
80 74
81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 75void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
96 InvalidateRegion(addr, size); 90 InvalidateRegion(addr, size);
97} 91}
98 92
93void ThreadManager::WaitIdle() const {
94 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
95 }
96}
97
99u64 ThreadManager::PushCommand(CommandData&& command_data) { 98u64 ThreadManager::PushCommand(CommandData&& command_data) {
100 const u64 fence{++state.last_fence}; 99 const u64 fence{++state.last_fence};
101 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 100 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
102 return fence; 101 return fence;
103} 102}
104 103
105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
106void SynchState::WaitForSynchronization(u64 fence) {
107 while (signaled_fence.load() < fence)
108 ;
109}
110
111} // namespace VideoCommon::GPUThread 104} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..08dc96bb3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
21 21
22namespace Core { 22namespace Core {
23class System; 23class System;
24namespace Timing {
25struct EventType;
26} // namespace Timing
27} // namespace Core 24} // namespace Core
28 25
29namespace VideoCommon::GPUThread { 26namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
89struct SynchState final { 86struct SynchState final {
90 std::atomic_bool is_running{true}; 87 std::atomic_bool is_running{true};
91 88
92 void WaitForSynchronization(u64 fence);
93
94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 89 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
95 CommandQueue queue; 90 CommandQueue queue;
96 u64 last_fence{}; 91 u64 last_fence{};
@@ -121,6 +116,9 @@ public:
121 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 116 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
122 void FlushAndInvalidateRegion(CacheAddr addr, u64 size); 117 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
123 118
119 // Wait until the gpu thread is idle.
120 void WaitIdle() const;
121
124private: 122private:
125 /// Pushes a command to be executed by the GPU thread 123 /// Pushes a command to be executed by the GPU thread
126 u64 PushCommand(CommandData&& command_data); 124 u64 PushCommand(CommandData&& command_data);
@@ -128,7 +126,6 @@ private:
128private: 126private:
129 SynchState state; 127 SynchState state;
130 Core::System& system; 128 Core::System& system;
131 Core::Timing::EventType* synchronization_event{};
132 std::thread thread; 129 std::thread thread;
133 std::thread::id thread_id; 130 std::thread::id thread_id;
134}; 131};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a85f730a8..cbcf81414 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
348} 348}
349 349
350void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { 350void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
351 std::lock_guard lock{pages_mutex};
351 const u64 page_start{addr >> Memory::PAGE_BITS}; 352 const u64 page_start{addr >> Memory::PAGE_BITS};
352 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; 353 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
353 354
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c10ebda3..c24a02d71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <map> 10#include <map>
11#include <memory> 11#include <memory>
12#include <mutex>
12#include <optional> 13#include <optional>
13#include <tuple> 14#include <tuple>
14#include <utility> 15#include <utility>
@@ -230,6 +231,8 @@ private:
230 231
231 using CachedPageMap = boost::icl::interval_map<u64, int>; 232 using CachedPageMap = boost::icl::interval_map<u64, int>;
232 CachedPageMap cached_pages; 233 CachedPageMap cached_pages;
234
235 std::mutex pages_mutex;
233}; 236};
234 237
235} // namespace OpenGL 238} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1e6ef66ab..4bbd17b12 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 104void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
105 system.GetPerfStats().EndSystemFrame();
106
107 // Maintain the rasterizer's state as a priority 105 // Maintain the rasterizer's state as a priority
108 OpenGLState prev_state = OpenGLState::GetCurState(); 106 OpenGLState prev_state = OpenGLState::GetCurState();
109 state.AllDirty(); 107 state.AllDirty();
@@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
135 133
136 render_window.PollEvents(); 134 render_window.PollEvents();
137 135
138 system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
139 system.GetPerfStats().BeginSystemFrame();
140
141 // Restore the rasterizer state 136 // Restore the rasterizer state
142 prev_state.AllDirty(); 137 prev_state.AllDirty();
143 prev_state.Apply(); 138 prev_state.Apply();