summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/gpu.cpp22
-rw-r--r--src/video_core/gpu.h21
-rw-r--r--src/video_core/gpu_thread.cpp17
-rw-r--r--src/video_core/gpu_thread.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp4
6 files changed, 70 insertions, 8 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 19d3bd305..85a6c7bb5 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
125 return true; 125 return true;
126} 126}
127 127
128u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) {
129 std::unique_lock lck{flush_request_mutex};
130 const u64 fence = ++last_flush_fence;
131 flush_requests.emplace_back(fence, addr, size);
132 return fence;
133}
134
135void GPU::TickWork() {
136 std::unique_lock lck{flush_request_mutex};
137 while (!flush_requests.empty()) {
138 auto& request = flush_requests.front();
139 const u64 fence = request.fence;
140 const CacheAddr addr = request.addr;
141 const std::size_t size = request.size;
142 flush_requests.pop_front();
143 flush_request_mutex.unlock();
144 renderer->Rasterizer().FlushRegion(addr, size);
145 current_flush_fence.store(fence);
146 flush_request_mutex.lock();
147 }
148}
149
128u64 GPU::GetTicks() const { 150u64 GPU::GetTicks() const {
129 // This values were reversed engineered by fincs from NVN 151 // This values were reversed engineered by fincs from NVN
130 // The gpu clock is reported in units of 385/625 nanoseconds 152 // The gpu clock is reported in units of 385/625 nanoseconds
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fa9991c87..943a5b110 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -159,6 +159,14 @@ public:
159 void SyncGuestHost(); 159 void SyncGuestHost();
160 virtual void OnCommandListEnd(); 160 virtual void OnCommandListEnd();
161 161
162 u64 RequestFlush(CacheAddr addr, std::size_t size);
163
164 u64 CurrentFlushRequestFence() const {
165 return current_flush_fence.load(std::memory_order_relaxed);
166 }
167
168 void TickWork();
169
162 /// Returns a reference to the Maxwell3D GPU engine. 170 /// Returns a reference to the Maxwell3D GPU engine.
163 Engines::Maxwell3D& Maxwell3D(); 171 Engines::Maxwell3D& Maxwell3D();
164 172
@@ -327,6 +335,19 @@ private:
327 335
328 std::condition_variable sync_cv; 336 std::condition_variable sync_cv;
329 337
338 struct FlushRequest {
339 FlushRequest(u64 fence, CacheAddr addr, std::size_t size)
340 : fence{fence}, addr{addr}, size{size} {}
341 u64 fence;
342 CacheAddr addr;
343 std::size_t size;
344 };
345
346 std::list<FlushRequest> flush_requests;
347 std::atomic<u64> current_flush_fence{};
348 u64 last_flush_fence{};
349 std::mutex flush_request_mutex;
350
330 const bool is_async; 351 const bool is_async;
331}; 352};
332 353
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3e2be00e9..9460364a3 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -15,8 +15,9 @@
15namespace VideoCommon::GPUThread { 15namespace VideoCommon::GPUThread {
16 16
17/// Runs the GPU thread 17/// Runs the GPU thread
18static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, 18static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
19 Tegra::DmaPusher& dma_pusher, SynchState& state) { 19 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
20 SynchState& state) {
20 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
21 22
22 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
@@ -40,6 +41,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
40 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 41 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
41 } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { 42 } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
42 renderer.Rasterizer().ReleaseFences(); 43 renderer.Rasterizer().ReleaseFences();
44 } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
45 system.GPU().TickWork();
43 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 46 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
44 renderer.Rasterizer().FlushRegion(data->addr, data->size); 47 renderer.Rasterizer().FlushRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 48 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -68,8 +71,8 @@ ThreadManager::~ThreadManager() {
68void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 71void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
69 Core::Frontend::GraphicsContext& context, 72 Core::Frontend::GraphicsContext& context,
70 Tegra::DmaPusher& dma_pusher) { 73 Tegra::DmaPusher& dma_pusher) {
71 thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), 74 thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
72 std::ref(state)}; 75 std::ref(context), std::ref(dma_pusher), std::ref(state)};
73} 76}
74 77
75void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 78void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -85,8 +88,10 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
85 return; 88 return;
86 } 89 }
87 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { 90 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
88 u64 fence = PushCommand(FlushRegionCommand(addr, size)); 91 auto& gpu = system.GPU();
89 while (fence > state.signaled_fence.load(std::memory_order_relaxed)) { 92 u64 fence = gpu.RequestFlush(addr, size);
93 PushCommand(GPUTickCommand());
94 while (fence > gpu.CurrentFlushRequestFence()) {
90 } 95 }
91 } 96 }
92} 97}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 9d0877921..5a28335d6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,12 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
70 u64 size; 70 u64 size;
71}; 71};
72 72
73/// Command to signal to the GPU thread that processing has ended 73/// Command called within the gpu, to schedule actions after a command list end
74struct OnCommandListEndCommand final {}; 74struct OnCommandListEndCommand final {};
75 75
76/// Command to make the gpu look into pending requests
77struct GPUTickCommand final {};
78
76using CommandData = 79using CommandData =
77 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, 80 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
78 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>; 81 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
82 GPUTickCommand>;
79 83
80struct CommandDataContainer { 84struct CommandDataContainer {
81 CommandDataContainer() = default; 85 CommandDataContainer() = default;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e52e5961f..fbd81b895 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -601,6 +601,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
601 EndTransformFeedback(); 601 EndTransformFeedback();
602 602
603 ++num_queued_commands; 603 ++num_queued_commands;
604
605 system.GPU().TickWork();
604} 606}
605 607
606void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 608void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -628,6 +630,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
628 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 630 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
629 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 631 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
630 ++num_queued_commands; 632 ++num_queued_commands;
633 system.GPU().TickWork();
631} 634}
632 635
633void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 636void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -652,6 +655,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
652} 655}
653 656
654bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { 657bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
658 if (!Settings::IsGPULevelExtreme()) {
659 return buffer_cache.MustFlushRegion(addr, size);
660 }
655 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); 661 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
656} 662}
657 663
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 507262c8f..926ecf38e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -365,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
365 }); 365 });
366 366
367 EndTransformFeedback(); 367 EndTransformFeedback();
368
369 system.GPU().TickWork();
368} 370}
369 371
370void RasterizerVulkan::Clear() { 372void RasterizerVulkan::Clear() {
@@ -492,6 +494,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
492 descriptor_set, {}); 494 descriptor_set, {});
493 cmdbuf.Dispatch(grid_x, grid_y, grid_z); 495 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
494 }); 496 });
497
498 system.GPU().TickWork();
495} 499}
496 500
497void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { 501void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {