diff options
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 4 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.h | 2 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 7 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 32 |
12 files changed, 94 insertions, 19 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2605c3b42..c297bc31b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 397 | } | 397 | } |
| 398 | } | 398 | } |
| 399 | 399 | ||
| 400 | void Maxwell3D::ReleaseFences() { | ||
| 401 | for (const auto pair : delay_fences) { | ||
| 402 | const auto [addr, payload] = pair; | ||
| 403 | memory_manager.Write<u32>(addr, static_cast<u32>(payload)); | ||
| 404 | } | ||
| 405 | delay_fences.clear(); | ||
| 406 | } | ||
| 407 | |||
| 408 | void Maxwell3D::ProcessQueryGet() { | 400 | void Maxwell3D::ProcessQueryGet() { |
| 409 | // TODO(Subv): Support the other query units. | 401 | // TODO(Subv): Support the other query units. |
| 410 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 402 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |
| @@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 412 | 404 | ||
| 413 | switch (regs.query.query_get.operation) { | 405 | switch (regs.query.query_get.operation) { |
| 414 | case Regs::QueryOperation::Release: { | 406 | case Regs::QueryOperation::Release: { |
| 415 | rasterizer.FlushCommands(); | ||
| 416 | rasterizer.SyncGuestHost(); | ||
| 417 | const u64 result = regs.query.query_sequence; | 407 | const u64 result = regs.query.query_sequence; |
| 418 | delay_fences.emplace_back(regs.query.QueryAddress(), result); | 408 | if (regs.query.query_get.fence == 1) { |
| 409 | rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result)); | ||
| 410 | } else { | ||
| 411 | StampQueryResult(result, regs.query.query_get.short_query == 0); | ||
| 412 | } | ||
| 419 | break; | 413 | break; |
| 420 | } | 414 | } |
| 421 | case Regs::QueryOperation::Acquire: | 415 | case Regs::QueryOperation::Acquire: |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a93827ec..59d5752d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1427,8 +1427,6 @@ public: | |||
| 1427 | Tables tables{}; | 1427 | Tables tables{}; |
| 1428 | } dirty; | 1428 | } dirty; |
| 1429 | 1429 | ||
| 1430 | void ReleaseFences(); | ||
| 1431 | |||
| 1432 | private: | 1430 | private: |
| 1433 | void InitializeRegisterDefaults(); | 1431 | void InitializeRegisterDefaults(); |
| 1434 | 1432 | ||
| @@ -1469,8 +1467,6 @@ private: | |||
| 1469 | 1467 | ||
| 1470 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | 1468 | std::array<u8, Regs::NUM_REGS> dirty_pointers{}; |
| 1471 | 1469 | ||
| 1472 | std::vector<std::pair<GPUVAddr, u64>> delay_fences; | ||
| 1473 | |||
| 1474 | /// Retrieves information about a specific TIC entry from the TIC buffer. | 1470 | /// Retrieves information about a specific TIC entry from the TIC buffer. |
| 1475 | Texture::TICEntry GetTICEntry(u32 tic_index) const; | 1471 | Texture::TICEntry GetTICEntry(u32 tic_index) const; |
| 1476 | 1472 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 71ddfbd26..d05b6a9d2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -147,7 +147,7 @@ void GPU::SyncGuestHost() { | |||
| 147 | } | 147 | } |
| 148 | 148 | ||
| 149 | void GPU::OnCommandListEnd() { | 149 | void GPU::OnCommandListEnd() { |
| 150 | maxwell_3d->ReleaseFences(); | 150 | renderer.Rasterizer().ReleaseFences(); |
| 151 | } | 151 | } |
| 152 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | 152 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence |
| 153 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | 153 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b88445634..fa9991c87 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -157,7 +157,7 @@ public: | |||
| 157 | 157 | ||
| 158 | void FlushCommands(); | 158 | void FlushCommands(); |
| 159 | void SyncGuestHost(); | 159 | void SyncGuestHost(); |
| 160 | void OnCommandListEnd(); | 160 | virtual void OnCommandListEnd(); |
| 161 | 161 | ||
| 162 | /// Returns a reference to the Maxwell3D GPU engine. | 162 | /// Returns a reference to the Maxwell3D GPU engine. |
| 163 | Engines::Maxwell3D& Maxwell3D(); | 163 | Engines::Maxwell3D& Maxwell3D(); |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 20e73a37e..53305ab43 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const { | |||
| 52 | gpu_thread.WaitIdle(); | 52 | gpu_thread.WaitIdle(); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | void GPUAsynch::OnCommandListEnd() { | ||
| 56 | gpu_thread.OnCommandListEnd(); | ||
| 57 | } | ||
| 58 | |||
| 55 | } // namespace VideoCommon | 59 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 03fd0eef0..517658612 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -32,6 +32,8 @@ public: | |||
| 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 33 | void WaitIdle() const override; | 33 | void WaitIdle() const override; |
| 34 | 34 | ||
| 35 | void OnCommandListEnd() override; | ||
| 36 | |||
| 35 | protected: | 37 | protected: |
| 36 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | 38 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |
| 37 | 39 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1994d3bb4..251a9d911 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic | |||
| 37 | dma_pusher.DispatchCalls(); | 37 | dma_pusher.DispatchCalls(); |
| 38 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | 38 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 39 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 39 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 40 | } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { | ||
| 41 | renderer.Rasterizer().ReleaseFences(); | ||
| 40 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 42 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |
| 41 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 43 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 42 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 44 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| @@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const { | |||
| 95 | } | 97 | } |
| 96 | } | 98 | } |
| 97 | 99 | ||
| 100 | void ThreadManager::OnCommandListEnd() { | ||
| 101 | PushCommand(OnCommandListEndCommand()); | ||
| 102 | } | ||
| 103 | |||
| 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 104 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 99 | const u64 fence{++state.last_fence}; | 105 | const u64 fence{++state.last_fence}; |
| 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 106 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cd74ad330..9d0877921 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final { | |||
| 70 | u64 size; | 70 | u64 size; |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| 73 | /// Command to signal to the GPU thread that processing has ended | ||
| 74 | struct OnCommandListEndCommand final {}; | ||
| 75 | |||
| 73 | using CommandData = | 76 | using CommandData = |
| 74 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | 77 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
| 75 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | 78 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>; |
| 76 | 79 | ||
| 77 | struct CommandDataContainer { | 80 | struct CommandDataContainer { |
| 78 | CommandDataContainer() = default; | 81 | CommandDataContainer() = default; |
| @@ -122,6 +125,8 @@ public: | |||
| 122 | // Wait until the gpu thread is idle. | 125 | // Wait until the gpu thread is idle. |
| 123 | void WaitIdle() const; | 126 | void WaitIdle() const; |
| 124 | 127 | ||
| 128 | void OnCommandListEnd(); | ||
| 129 | |||
| 125 | private: | 130 | private: |
| 126 | /// Pushes a command to be executed by the GPU thread | 131 | /// Pushes a command to be executed by the GPU thread |
| 127 | u64 PushCommand(CommandData&& command_data); | 132 | u64 PushCommand(CommandData&& command_data); |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0d05a3fc7..72f65b166 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -49,6 +49,14 @@ public: | |||
| 49 | /// Records a GPU query and caches it | 49 | /// Records a GPU query and caches it |
| 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |
| 51 | 51 | ||
| 52 | virtual void SignalFence(GPUVAddr addr, u32 value) { | ||
| 53 | |||
| 54 | } | ||
| 55 | |||
| 56 | virtual void ReleaseFences() { | ||
| 57 | |||
| 58 | } | ||
| 59 | |||
| 52 | /// Notify rasterizer that all caches should be flushed to Switch memory | 60 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 53 | virtual void FlushAll() = 0; | 61 | virtual void FlushAll() = 0; |
| 54 | 62 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 988eaeaa5..93bb33e8c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() { | |||
| 676 | buffer_cache.SyncGuestHost(); | 676 | buffer_cache.SyncGuestHost(); |
| 677 | } | 677 | } |
| 678 | 678 | ||
| 679 | void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) { | ||
| 680 | if (!fences.empty()) { | ||
| 681 | const std::pair<GPUVAddr, u32>& current_fence = fences.front(); | ||
| 682 | const auto [address, payload] = current_fence; | ||
| 683 | texture_cache.PopAsyncFlushes(); | ||
| 684 | auto& gpu{system.GPU()}; | ||
| 685 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 686 | memory_manager.Write<u32>(address, payload); | ||
| 687 | fences.pop_front(); | ||
| 688 | } | ||
| 689 | fences.emplace_back(addr, value); | ||
| 690 | texture_cache.CommitAsyncFlushes(); | ||
| 691 | FlushCommands(); | ||
| 692 | SyncGuestHost(); | ||
| 693 | } | ||
| 694 | |||
| 695 | void RasterizerOpenGL::ReleaseFences() { | ||
| 696 | while (!fences.empty()) { | ||
| 697 | const std::pair<GPUVAddr, u32>& current_fence = fences.front(); | ||
| 698 | const auto [address, payload] = current_fence; | ||
| 699 | texture_cache.PopAsyncFlushes(); | ||
| 700 | auto& gpu{system.GPU()}; | ||
| 701 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 702 | memory_manager.Write<u32>(address, payload); | ||
| 703 | fences.pop_front(); | ||
| 704 | } | ||
| 705 | } | ||
| 706 | |||
| 679 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 707 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 680 | if (Settings::IsGPULevelExtreme()) { | 708 | if (Settings::IsGPULevelExtreme()) { |
| 681 | FlushRegion(addr, size); | 709 | FlushRegion(addr, size); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a870024c6..486a154ad 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -69,6 +69,8 @@ public: | |||
| 69 | void InvalidateRegion(VAddr addr, u64 size) override; | 69 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 70 | void OnCPUWrite(VAddr addr, u64 size) override; | 70 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 71 | void SyncGuestHost() override; | 71 | void SyncGuestHost() override; |
| 72 | void SignalFence(GPUVAddr addr, u32 value) override; | ||
| 73 | void ReleaseFences() override; | ||
| 72 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 74 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 73 | void FlushCommands() override; | 75 | void FlushCommands() override; |
| 74 | void TickFrame() override; | 76 | void TickFrame() override; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8c8390bb..6629c59ed 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -238,7 +238,7 @@ public: | |||
| 238 | surface->MarkAsRenderTarget(false, NO_RT); | 238 | surface->MarkAsRenderTarget(false, NO_RT); |
| 239 | const auto& cr_params = surface->GetSurfaceParams(); | 239 | const auto& cr_params = surface->GetSurfaceParams(); |
| 240 | if (!cr_params.is_tiled) { | 240 | if (!cr_params.is_tiled) { |
| 241 | FlushSurface(surface); | 241 | AsyncFlushSurface(surface); |
| 242 | } | 242 | } |
| 243 | } | 243 | } |
| 244 | render_targets[index].target = surface_view.first; | 244 | render_targets[index].target = surface_view.first; |
| @@ -317,6 +317,26 @@ public: | |||
| 317 | return ++ticks; | 317 | return ++ticks; |
| 318 | } | 318 | } |
| 319 | 319 | ||
| 320 | void CommitAsyncFlushes() { | ||
| 321 | commited_flushes.push_back(uncommited_flushes); | ||
| 322 | uncommited_flushes.reset(); | ||
| 323 | } | ||
| 324 | |||
| 325 | void PopAsyncFlushes() { | ||
| 326 | if (commited_flushes.empty()) { | ||
| 327 | return; | ||
| 328 | } | ||
| 329 | auto& flush_list = commited_flushes.front(); | ||
| 330 | if (!flush_list) { | ||
| 331 | commited_flushes.pop_front(); | ||
| 332 | return; | ||
| 333 | } | ||
| 334 | for (TSurface& surface : *flush_list) { | ||
| 335 | FlushSurface(surface); | ||
| 336 | } | ||
| 337 | commited_flushes.pop_front(); | ||
| 338 | } | ||
| 339 | |||
| 320 | protected: | 340 | protected: |
| 321 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 341 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 322 | bool is_astc_supported) | 342 | bool is_astc_supported) |
| @@ -1152,6 +1172,13 @@ private: | |||
| 1152 | TView view; | 1172 | TView view; |
| 1153 | }; | 1173 | }; |
| 1154 | 1174 | ||
| 1175 | void AsyncFlushSurface(TSurface& surface) { | ||
| 1176 | if (!uncommited_flushes) { | ||
| 1177 | uncommited_flushes = std::make_shared<std::list<TSurface>>(); | ||
| 1178 | } | ||
| 1179 | uncommited_flushes->push_back(surface); | ||
| 1180 | } | ||
| 1181 | |||
| 1155 | VideoCore::RasterizerInterface& rasterizer; | 1182 | VideoCore::RasterizerInterface& rasterizer; |
| 1156 | 1183 | ||
| 1157 | FormatLookupTable format_lookup_table; | 1184 | FormatLookupTable format_lookup_table; |
| @@ -1198,6 +1225,9 @@ private: | |||
| 1198 | 1225 | ||
| 1199 | std::list<TSurface> marked_for_unregister; | 1226 | std::list<TSurface> marked_for_unregister; |
| 1200 | 1227 | ||
| 1228 | std::shared_ptr<std::list<TSurface>> uncommited_flushes{}; | ||
| 1229 | std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes; | ||
| 1230 | |||
| 1201 | StagingCache staging_cache; | 1231 | StagingCache staging_cache; |
| 1202 | std::recursive_mutex mutex; | 1232 | std::recursive_mutex mutex; |
| 1203 | }; | 1233 | }; |