summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/gpu.cpp48
-rw-r--r--src/video_core/gpu.h34
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h3
-rw-r--r--src/video_core/gpu_synch.cpp2
-rw-r--r--src/video_core/gpu_synch.h4
-rw-r--r--src/video_core/gpu_thread.cpp27
-rw-r--r--src/video_core/gpu_thread.h32
9 files changed, 105 insertions, 59 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 74c46ec04..125c53360 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -525,8 +525,9 @@ void Maxwell3D::ProcessSyncPoint() {
525 const u32 sync_point = regs.sync_info.sync_point.Value(); 525 const u32 sync_point = regs.sync_info.sync_point.Value();
526 const u32 increment = regs.sync_info.increment.Value(); 526 const u32 increment = regs.sync_info.increment.Value();
527 const u32 cache_flush = regs.sync_info.unknown.Value(); 527 const u32 cache_flush = regs.sync_info.unknown.Value();
528 LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, 528 if (increment) {
529 cache_flush); 529 system.GPU().IncrementSyncPoint(sync_point);
530 }
530} 531}
531 532
532void Maxwell3D::DrawArrays() { 533void Maxwell3D::DrawArrays() {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 21007d8b2..1622332a4 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -29,7 +29,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
29 UNREACHABLE(); 29 UNREACHABLE();
30} 30}
31 31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
33 : system{system}, renderer{renderer}, is_async{is_async} {
33 auto& rasterizer{renderer.Rasterizer()}; 34 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); 35 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 36 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
@@ -74,6 +75,51 @@ const DmaPusher& GPU::DmaPusher() const {
74 return *dma_pusher; 75 return *dma_pusher;
75} 76}
76 77
78void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
79 syncpoints[syncpoint_id]++;
80 std::lock_guard lock{sync_mutex};
81 if (!syncpt_interrupts[syncpoint_id].empty()) {
82 u32 value = syncpoints[syncpoint_id].load();
83 auto it = syncpt_interrupts[syncpoint_id].begin();
84 while (it != syncpt_interrupts[syncpoint_id].end()) {
85 if (value >= *it) {
86 TriggerCpuInterrupt(syncpoint_id, *it);
87 it = syncpt_interrupts[syncpoint_id].erase(it);
88 continue;
89 }
90 it++;
91 }
92 }
93}
94
95u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
96 return syncpoints[syncpoint_id].load();
97}
98
99void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
100 auto& interrupt = syncpt_interrupts[syncpoint_id];
101 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
102 [value](u32 in_value) { return in_value == value; });
103 if (contains) {
104 return;
105 }
106 syncpt_interrupts[syncpoint_id].emplace_back(value);
107}
108
109bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
110 std::lock_guard lock{sync_mutex};
111 auto& interrupt = syncpt_interrupts[syncpoint_id];
112 const auto iter =
113 std::find_if(interrupt.begin(), interrupt.end(),
114 [value](u32 interrupt_value) { return value == interrupt_value; });
115
116 if (iter == interrupt.end()) {
117 return false;
118 }
119 interrupt.erase(iter);
120 return true;
121}
122
77u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { 123u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
78 ASSERT(format != RenderTargetFormat::NONE); 124 ASSERT(format != RenderTargetFormat::NONE);
79 125
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 0055e5326..87c96f46b 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -5,8 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <list>
8#include <memory> 10#include <memory>
11#include <mutex>
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/hle/service/nvdrv/nvdata.h"
10#include "core/hle/service/nvflinger/buffer_queue.h" 14#include "core/hle/service/nvflinger/buffer_queue.h"
11#include "video_core/dma_pusher.h" 15#include "video_core/dma_pusher.h"
12 16
@@ -127,7 +131,7 @@ class MemoryManager;
127 131
128class GPU { 132class GPU {
129public: 133public:
130 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); 134 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async);
131 135
132 virtual ~GPU(); 136 virtual ~GPU();
133 137
@@ -170,6 +174,22 @@ public:
170 /// Returns a reference to the GPU DMA pusher. 174 /// Returns a reference to the GPU DMA pusher.
171 Tegra::DmaPusher& DmaPusher(); 175 Tegra::DmaPusher& DmaPusher();
172 176
177 void IncrementSyncPoint(u32 syncpoint_id);
178
179 u32 GetSyncpointValue(u32 syncpoint_id) const;
180
181 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
182
183 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
184
185 std::unique_lock<std::mutex> LockSync() {
186 return std::unique_lock{sync_mutex};
187 }
188
189 bool IsAsync() const {
190 return is_async;
191 }
192
173 /// Returns a const reference to the GPU DMA pusher. 193 /// Returns a const reference to the GPU DMA pusher.
174 const Tegra::DmaPusher& DmaPusher() const; 194 const Tegra::DmaPusher& DmaPusher() const;
175 195
@@ -239,6 +259,9 @@ public:
239 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 259 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
240 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 260 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
241 261
262protected:
263 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
264
242private: 265private:
243 void ProcessBindMethod(const MethodCall& method_call); 266 void ProcessBindMethod(const MethodCall& method_call);
244 void ProcessSemaphoreTriggerMethod(); 267 void ProcessSemaphoreTriggerMethod();
@@ -257,6 +280,7 @@ private:
257protected: 280protected:
258 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 281 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
259 VideoCore::RendererBase& renderer; 282 VideoCore::RendererBase& renderer;
283 Core::System& system;
260 284
261private: 285private:
262 std::unique_ptr<Tegra::MemoryManager> memory_manager; 286 std::unique_ptr<Tegra::MemoryManager> memory_manager;
@@ -273,6 +297,14 @@ private:
273 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 297 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
274 /// Inline memory engine 298 /// Inline memory engine
275 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 299 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
300
301 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
302
303 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
304
305 std::mutex sync_mutex;
306
307 const bool is_async;
276}; 308};
277 309
278#define ASSERT_REG_POSITION(field_name, position) \ 310#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index d4e2553a9..ea67be831 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
6#include "core/hardware_interrupt_manager.h"
5#include "video_core/gpu_asynch.h" 7#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h" 8#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h" 9#include "video_core/renderer_base.h"
@@ -9,7 +11,7 @@
9namespace VideoCommon { 11namespace VideoCommon {
10 12
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) 13GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : GPU(system, renderer), gpu_thread{system} {} 14 : GPU(system, renderer, true), gpu_thread{system} {}
13 15
14GPUAsynch::~GPUAsynch() = default; 16GPUAsynch::~GPUAsynch() = default;
15 17
@@ -38,4 +40,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
38 gpu_thread.FlushAndInvalidateRegion(addr, size); 40 gpu_thread.FlushAndInvalidateRegion(addr, size);
39} 41}
40 42
43void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
44 auto& interrupt_manager = system.InterruptManager();
45 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
46}
47
41} // namespace VideoCommon 48} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 30be74cba..36377d677 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -27,6 +27,9 @@ public:
27 void InvalidateRegion(CacheAddr addr, u64 size) override; 27 void InvalidateRegion(CacheAddr addr, u64 size) override;
28 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 28 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
29 29
30protected:
31 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
32
30private: 33private:
31 GPUThread::ThreadManager gpu_thread; 34 GPUThread::ThreadManager gpu_thread;
32}; 35};
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 45e43b1dc..d4ead9c47 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -8,7 +8,7 @@
8namespace VideoCommon { 8namespace VideoCommon {
9 9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) 10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : GPU(system, renderer) {} 11 : GPU(system, renderer, false) {}
12 12
13GPUSynch::~GPUSynch() = default; 13GPUSynch::~GPUSynch() = default;
14 14
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 3031fcf72..07bcc47f1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -25,6 +25,10 @@ public:
25 void FlushRegion(CacheAddr addr, u64 size) override; 25 void FlushRegion(CacheAddr addr, u64 size) override;
26 void InvalidateRegion(CacheAddr addr, u64 size) override; 26 void InvalidateRegion(CacheAddr addr, u64 size) override;
27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
28
29protected:
30 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
31 [[maybe_unused]] u32 value) const override {}
28}; 32};
29 33
30} // namespace VideoCommon 34} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3f0939ec9..b441e92b0 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
21 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
22 22
23 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
24 state.WaitForCommands(); 24 while (state.queue.Empty())
25 ;
25 26
26 // If emulation was stopped during disk shader loading, abort before trying to acquire context 27 // If emulation was stopped during disk shader loading, abort before trying to acquire context
27 if (!state.is_running) { 28 if (!state.is_running) {
@@ -32,7 +33,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
32 33
33 CommandDataContainer next; 34 CommandDataContainer next;
34 while (state.is_running) { 35 while (state.is_running) {
35 state.WaitForCommands();
36 while (!state.queue.Empty()) { 36 while (!state.queue.Empty()) {
37 state.queue.Pop(next); 37 state.queue.Pop(next);
38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
@@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
49 } else { 49 } else {
50 UNREACHABLE(); 50 UNREACHABLE();
51 } 51 }
52 state.signaled_fence = next.fence; 52 state.signaled_fence.store(next.fence);
53 state.TrySynchronize();
54 } 53 }
55 } 54 }
56} 55}
@@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
89} 88}
90 89
91void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 90void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
92 if (state.queue.Empty()) { 91 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
93 // It's quicker to invalidate a single region on the CPU if the queue is already empty
94 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
95 } else {
96 PushCommand(InvalidateRegionCommand(addr, size));
97 }
98} 92}
99 93
100void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 94void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
105u64 ThreadManager::PushCommand(CommandData&& command_data) { 99u64 ThreadManager::PushCommand(CommandData&& command_data) {
106 const u64 fence{++state.last_fence}; 100 const u64 fence{++state.last_fence};
107 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 101 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
108 state.SignalCommands();
109 return fence; 102 return fence;
110} 103}
111 104
112MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
113void SynchState::WaitForSynchronization(u64 fence) { 106void SynchState::WaitForSynchronization(u64 fence) {
114 if (signaled_fence >= fence) { 107 while (signaled_fence.load() < fence)
115 return; 108 ;
116 }
117
118 // Wait for the GPU to be idle (all commands to be executed)
119 {
120 MICROPROFILE_SCOPE(GPU_wait);
121 std::unique_lock lock{synchronization_mutex};
122 synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
123 }
124} 109}
125 110
126} // namespace VideoCommon::GPUThread 111} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 05a168a72..1d9d0c39e 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -88,41 +88,9 @@ struct CommandDataContainer {
88/// Struct used to synchronize the GPU thread 88/// Struct used to synchronize the GPU thread
89struct SynchState final { 89struct SynchState final {
90 std::atomic_bool is_running{true}; 90 std::atomic_bool is_running{true};
91 std::atomic_int queued_frame_count{};
92 std::mutex synchronization_mutex;
93 std::mutex commands_mutex;
94 std::condition_variable commands_condition;
95 std::condition_variable synchronization_condition;
96
97 /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
98 /// synchronized. This is entirely empirical.
99 bool IsSynchronized() const {
100 constexpr std::size_t max_queue_gap{5};
101 return queue.Size() <= max_queue_gap;
102 }
103
104 void TrySynchronize() {
105 if (IsSynchronized()) {
106 std::lock_guard lock{synchronization_mutex};
107 synchronization_condition.notify_one();
108 }
109 }
110 91
111 void WaitForSynchronization(u64 fence); 92 void WaitForSynchronization(u64 fence);
112 93
113 void SignalCommands() {
114 if (queue.Empty()) {
115 return;
116 }
117
118 commands_condition.notify_one();
119 }
120
121 void WaitForCommands() {
122 std::unique_lock lock{commands_mutex};
123 commands_condition.wait(lock, [this] { return !queue.Empty(); });
124 }
125
126 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
127 CommandQueue queue; 95 CommandQueue queue;
128 u64 last_fence{}; 96 u64 last_fence{};