summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h4
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h7
-rw-r--r--src/video_core/rasterizer_interface.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h32
12 files changed, 94 insertions, 19 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2605c3b42..c297bc31b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
397 } 397 }
398} 398}
399 399
400void Maxwell3D::ReleaseFences() {
401 for (const auto pair : delay_fences) {
402 const auto [addr, payload] = pair;
403 memory_manager.Write<u32>(addr, static_cast<u32>(payload));
404 }
405 delay_fences.clear();
406}
407
408void Maxwell3D::ProcessQueryGet() { 400void Maxwell3D::ProcessQueryGet() {
409 // TODO(Subv): Support the other query units. 401 // TODO(Subv): Support the other query units.
410 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 402 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() {
412 404
413 switch (regs.query.query_get.operation) { 405 switch (regs.query.query_get.operation) {
414 case Regs::QueryOperation::Release: { 406 case Regs::QueryOperation::Release: {
415 rasterizer.FlushCommands();
416 rasterizer.SyncGuestHost();
417 const u64 result = regs.query.query_sequence; 407 const u64 result = regs.query.query_sequence;
418 delay_fences.emplace_back(regs.query.QueryAddress(), result); 408 if (regs.query.query_get.fence == 1) {
409 rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result));
410 } else {
411 StampQueryResult(result, regs.query.query_get.short_query == 0);
412 }
419 break; 413 break;
420 } 414 }
421 case Regs::QueryOperation::Acquire: 415 case Regs::QueryOperation::Acquire:
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0a93827ec..59d5752d2 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1427,8 +1427,6 @@ public:
1427 Tables tables{}; 1427 Tables tables{};
1428 } dirty; 1428 } dirty;
1429 1429
1430 void ReleaseFences();
1431
1432private: 1430private:
1433 void InitializeRegisterDefaults(); 1431 void InitializeRegisterDefaults();
1434 1432
@@ -1469,8 +1467,6 @@ private:
1469 1467
1470 std::array<u8, Regs::NUM_REGS> dirty_pointers{}; 1468 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1471 1469
1472 std::vector<std::pair<GPUVAddr, u64>> delay_fences;
1473
1474 /// Retrieves information about a specific TIC entry from the TIC buffer. 1470 /// Retrieves information about a specific TIC entry from the TIC buffer.
1475 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1471 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1476 1472
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 71ddfbd26..d05b6a9d2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
147} 147}
148 148
149void GPU::OnCommandListEnd() { 149void GPU::OnCommandListEnd() {
150 maxwell_3d->ReleaseFences(); 150 renderer.Rasterizer().ReleaseFences();
151} 151}
152// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence 152// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
153// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. 153// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b88445634..fa9991c87 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -157,7 +157,7 @@ public:
157 157
158 void FlushCommands(); 158 void FlushCommands();
159 void SyncGuestHost(); 159 void SyncGuestHost();
160 void OnCommandListEnd(); 160 virtual void OnCommandListEnd();
161 161
162 /// Returns a reference to the Maxwell3D GPU engine. 162 /// Returns a reference to the Maxwell3D GPU engine.
163 Engines::Maxwell3D& Maxwell3D(); 163 Engines::Maxwell3D& Maxwell3D();
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 20e73a37e..53305ab43 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
52 gpu_thread.WaitIdle(); 52 gpu_thread.WaitIdle();
53} 53}
54 54
55void GPUAsynch::OnCommandListEnd() {
56 gpu_thread.OnCommandListEnd();
57}
58
55} // namespace VideoCommon 59} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 03fd0eef0..517658612 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -32,6 +32,8 @@ public:
32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
33 void WaitIdle() const override; 33 void WaitIdle() const override;
34 34
35 void OnCommandListEnd() override;
36
35protected: 37protected:
36 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; 38 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
37 39
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1994d3bb4..251a9d911 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
37 dma_pusher.DispatchCalls(); 37 dma_pusher.DispatchCalls();
38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
39 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 39 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
40 } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
41 renderer.Rasterizer().ReleaseFences();
40 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 42 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
41 renderer.Rasterizer().FlushRegion(data->addr, data->size); 43 renderer.Rasterizer().FlushRegion(data->addr, data->size);
42 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 44 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const {
95 } 97 }
96} 98}
97 99
100void ThreadManager::OnCommandListEnd() {
101 PushCommand(OnCommandListEndCommand());
102}
103
98u64 ThreadManager::PushCommand(CommandData&& command_data) { 104u64 ThreadManager::PushCommand(CommandData&& command_data) {
99 const u64 fence{++state.last_fence}; 105 const u64 fence{++state.last_fence};
100 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 106 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cd74ad330..9d0877921 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final {
70 u64 size; 70 u64 size;
71}; 71};
72 72
73/// Command to signal to the GPU thread that processing has ended
74struct OnCommandListEndCommand final {};
75
73using CommandData = 76using CommandData =
74 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, 77 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
75 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; 78 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
76 79
77struct CommandDataContainer { 80struct CommandDataContainer {
78 CommandDataContainer() = default; 81 CommandDataContainer() = default;
@@ -122,6 +125,8 @@ public:
122 // Wait until the gpu thread is idle. 125 // Wait until the gpu thread is idle.
123 void WaitIdle() const; 126 void WaitIdle() const;
124 127
128 void OnCommandListEnd();
129
125private: 130private:
126 /// Pushes a command to be executed by the GPU thread 131 /// Pushes a command to be executed by the GPU thread
127 u64 PushCommand(CommandData&& command_data); 132 u64 PushCommand(CommandData&& command_data);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0d05a3fc7..72f65b166 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,6 +49,14 @@ public:
49 /// Records a GPU query and caches it 49 /// Records a GPU query and caches it
50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; 50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
51 51
52 virtual void SignalFence(GPUVAddr addr, u32 value) {
53
54 }
55
56 virtual void ReleaseFences() {
57
58 }
59
52 /// Notify rasterizer that all caches should be flushed to Switch memory 60 /// Notify rasterizer that all caches should be flushed to Switch memory
53 virtual void FlushAll() = 0; 61 virtual void FlushAll() = 0;
54 62
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 988eaeaa5..93bb33e8c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() {
676 buffer_cache.SyncGuestHost(); 676 buffer_cache.SyncGuestHost();
677} 677}
678 678
679void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
680 if (!fences.empty()) {
681 const std::pair<GPUVAddr, u32>& current_fence = fences.front();
682 const auto [address, payload] = current_fence;
683 texture_cache.PopAsyncFlushes();
684 auto& gpu{system.GPU()};
685 auto& memory_manager{gpu.MemoryManager()};
686 memory_manager.Write<u32>(address, payload);
687 fences.pop_front();
688 }
689 fences.emplace_back(addr, value);
690 texture_cache.CommitAsyncFlushes();
691 FlushCommands();
692 SyncGuestHost();
693}
694
695void RasterizerOpenGL::ReleaseFences() {
696 while (!fences.empty()) {
697 const std::pair<GPUVAddr, u32>& current_fence = fences.front();
698 const auto [address, payload] = current_fence;
699 texture_cache.PopAsyncFlushes();
700 auto& gpu{system.GPU()};
701 auto& memory_manager{gpu.MemoryManager()};
702 memory_manager.Write<u32>(address, payload);
703 fences.pop_front();
704 }
705}
706
679void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 707void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
680 if (Settings::IsGPULevelExtreme()) { 708 if (Settings::IsGPULevelExtreme()) {
681 FlushRegion(addr, size); 709 FlushRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a870024c6..486a154ad 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -69,6 +69,8 @@ public:
69 void InvalidateRegion(VAddr addr, u64 size) override; 69 void InvalidateRegion(VAddr addr, u64 size) override;
70 void OnCPUWrite(VAddr addr, u64 size) override; 70 void OnCPUWrite(VAddr addr, u64 size) override;
71 void SyncGuestHost() override; 71 void SyncGuestHost() override;
72 void SignalFence(GPUVAddr addr, u32 value) override;
73 void ReleaseFences() override;
72 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 74 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
73 void FlushCommands() override; 75 void FlushCommands() override;
74 void TickFrame() override; 76 void TickFrame() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d8c8390bb..6629c59ed 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -238,7 +238,7 @@ public:
238 surface->MarkAsRenderTarget(false, NO_RT); 238 surface->MarkAsRenderTarget(false, NO_RT);
239 const auto& cr_params = surface->GetSurfaceParams(); 239 const auto& cr_params = surface->GetSurfaceParams();
240 if (!cr_params.is_tiled) { 240 if (!cr_params.is_tiled) {
241 FlushSurface(surface); 241 AsyncFlushSurface(surface);
242 } 242 }
243 } 243 }
244 render_targets[index].target = surface_view.first; 244 render_targets[index].target = surface_view.first;
@@ -317,6 +317,26 @@ public:
317 return ++ticks; 317 return ++ticks;
318 } 318 }
319 319
320 void CommitAsyncFlushes() {
321 commited_flushes.push_back(uncommited_flushes);
322 uncommited_flushes.reset();
323 }
324
325 void PopAsyncFlushes() {
326 if (commited_flushes.empty()) {
327 return;
328 }
329 auto& flush_list = commited_flushes.front();
330 if (!flush_list) {
331 commited_flushes.pop_front();
332 return;
333 }
334 for (TSurface& surface : *flush_list) {
335 FlushSurface(surface);
336 }
337 commited_flushes.pop_front();
338 }
339
320protected: 340protected:
321 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 341 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
322 bool is_astc_supported) 342 bool is_astc_supported)
@@ -1152,6 +1172,13 @@ private:
1152 TView view; 1172 TView view;
1153 }; 1173 };
1154 1174
1175 void AsyncFlushSurface(TSurface& surface) {
1176 if (!uncommited_flushes) {
1177 uncommited_flushes = std::make_shared<std::list<TSurface>>();
1178 }
1179 uncommited_flushes->push_back(surface);
1180 }
1181
1155 VideoCore::RasterizerInterface& rasterizer; 1182 VideoCore::RasterizerInterface& rasterizer;
1156 1183
1157 FormatLookupTable format_lookup_table; 1184 FormatLookupTable format_lookup_table;
@@ -1198,6 +1225,9 @@ private:
1198 1225
1199 std::list<TSurface> marked_for_unregister; 1226 std::list<TSurface> marked_for_unregister;
1200 1227
1228 std::shared_ptr<std::list<TSurface>> uncommited_flushes{};
1229 std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes;
1230
1201 StagingCache staging_cache; 1231 StagingCache staging_cache;
1202 std::recursive_mutex mutex; 1232 std::recursive_mutex mutex;
1203}; 1233};