diff options
| author | 2020-02-20 11:55:32 -0400 | |
|---|---|---|
| committer | 2020-04-22 11:36:17 -0400 | |
| commit | 1fb516cd979ed0dbf8fa7cb4f6a334932dfb6434 (patch) | |
| tree | 123d3f3e906e1af35c4bbced2d9029bc93fb4653 /src | |
| parent | FenceManager: Manage syncpoints and rename fences to semaphores. (diff) | |
| download | yuzu-1fb516cd979ed0dbf8fa7cb4f6a334932dfb6434.tar.gz yuzu-1fb516cd979ed0dbf8fa7cb4f6a334932dfb6434.tar.xz yuzu-1fb516cd979ed0dbf8fa7cb4f6a334932dfb6434.zip | |
GPU: Implement Flush Requests for Async mode.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/gpu.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 21 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 |
6 files changed, 70 insertions, 8 deletions
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 19d3bd305..85a6c7bb5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | |||
| 125 | return true; | 125 | return true; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) { | ||
| 129 | std::unique_lock lck{flush_request_mutex}; | ||
| 130 | const u64 fence = ++last_flush_fence; | ||
| 131 | flush_requests.emplace_back(fence, addr, size); | ||
| 132 | return fence; | ||
| 133 | } | ||
| 134 | |||
| 135 | void GPU::TickWork() { | ||
| 136 | std::unique_lock lck{flush_request_mutex}; | ||
| 137 | while (!flush_requests.empty()) { | ||
| 138 | auto& request = flush_requests.front(); | ||
| 139 | const u64 fence = request.fence; | ||
| 140 | const CacheAddr addr = request.addr; | ||
| 141 | const std::size_t size = request.size; | ||
| 142 | flush_requests.pop_front(); | ||
| 143 | flush_request_mutex.unlock(); | ||
| 144 | renderer->Rasterizer().FlushRegion(addr, size); | ||
| 145 | current_flush_fence.store(fence); | ||
| 146 | flush_request_mutex.lock(); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 128 | u64 GPU::GetTicks() const { | 150 | u64 GPU::GetTicks() const { |
| 129 | // This values were reversed engineered by fincs from NVN | 151 | // This values were reversed engineered by fincs from NVN |
| 130 | // The gpu clock is reported in units of 385/625 nanoseconds | 152 | // The gpu clock is reported in units of 385/625 nanoseconds |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fa9991c87..943a5b110 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -159,6 +159,14 @@ public: | |||
| 159 | void SyncGuestHost(); | 159 | void SyncGuestHost(); |
| 160 | virtual void OnCommandListEnd(); | 160 | virtual void OnCommandListEnd(); |
| 161 | 161 | ||
| 162 | u64 RequestFlush(CacheAddr addr, std::size_t size); | ||
| 163 | |||
| 164 | u64 CurrentFlushRequestFence() const { | ||
| 165 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 166 | } | ||
| 167 | |||
| 168 | void TickWork(); | ||
| 169 | |||
| 162 | /// Returns a reference to the Maxwell3D GPU engine. | 170 | /// Returns a reference to the Maxwell3D GPU engine. |
| 163 | Engines::Maxwell3D& Maxwell3D(); | 171 | Engines::Maxwell3D& Maxwell3D(); |
| 164 | 172 | ||
| @@ -327,6 +335,19 @@ private: | |||
| 327 | 335 | ||
| 328 | std::condition_variable sync_cv; | 336 | std::condition_variable sync_cv; |
| 329 | 337 | ||
| 338 | struct FlushRequest { | ||
| 339 | FlushRequest(u64 fence, CacheAddr addr, std::size_t size) | ||
| 340 | : fence{fence}, addr{addr}, size{size} {} | ||
| 341 | u64 fence; | ||
| 342 | CacheAddr addr; | ||
| 343 | std::size_t size; | ||
| 344 | }; | ||
| 345 | |||
| 346 | std::list<FlushRequest> flush_requests; | ||
| 347 | std::atomic<u64> current_flush_fence{}; | ||
| 348 | u64 last_flush_fence{}; | ||
| 349 | std::mutex flush_request_mutex; | ||
| 350 | |||
| 330 | const bool is_async; | 351 | const bool is_async; |
| 331 | }; | 352 | }; |
| 332 | 353 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3e2be00e9..9460364a3 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -15,8 +15,9 @@ | |||
| 15 | namespace VideoCommon::GPUThread { | 15 | namespace VideoCommon::GPUThread { |
| 16 | 16 | ||
| 17 | /// Runs the GPU thread | 17 | /// Runs the GPU thread |
| 18 | static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | 18 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, |
| 19 | Tegra::DmaPusher& dma_pusher, SynchState& state) { | 19 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, |
| 20 | SynchState& state) { | ||
| 20 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 21 | 22 | ||
| 22 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| @@ -40,6 +41,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic | |||
| 40 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 41 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 41 | } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { | 42 | } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { |
| 42 | renderer.Rasterizer().ReleaseFences(); | 43 | renderer.Rasterizer().ReleaseFences(); |
| 44 | } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) { | ||
| 45 | system.GPU().TickWork(); | ||
| 43 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 46 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |
| 44 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 47 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 45 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 48 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| @@ -68,8 +71,8 @@ ThreadManager::~ThreadManager() { | |||
| 68 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 71 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| 69 | Core::Frontend::GraphicsContext& context, | 72 | Core::Frontend::GraphicsContext& context, |
| 70 | Tegra::DmaPusher& dma_pusher) { | 73 | Tegra::DmaPusher& dma_pusher) { |
| 71 | thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), | 74 | thread = std::thread{RunThread, std::ref(system), std::ref(renderer), |
| 72 | std::ref(state)}; | 75 | std::ref(context), std::ref(dma_pusher), std::ref(state)}; |
| 73 | } | 76 | } |
| 74 | 77 | ||
| 75 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 78 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| @@ -85,8 +88,10 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 85 | return; | 88 | return; |
| 86 | } | 89 | } |
| 87 | if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { | 90 | if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { |
| 88 | u64 fence = PushCommand(FlushRegionCommand(addr, size)); | 91 | auto& gpu = system.GPU(); |
| 89 | while (fence > state.signaled_fence.load(std::memory_order_relaxed)) { | 92 | u64 fence = gpu.RequestFlush(addr, size); |
| 93 | PushCommand(GPUTickCommand()); | ||
| 94 | while (fence > gpu.CurrentFlushRequestFence()) { | ||
| 90 | } | 95 | } |
| 91 | } | 96 | } |
| 92 | } | 97 | } |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 9d0877921..5a28335d6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -70,12 +70,16 @@ struct FlushAndInvalidateRegionCommand final { | |||
| 70 | u64 size; | 70 | u64 size; |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| 73 | /// Command to signal to the GPU thread that processing has ended | 73 | /// Command called within the gpu, to schedule actions after a command list end |
| 74 | struct OnCommandListEndCommand final {}; | 74 | struct OnCommandListEndCommand final {}; |
| 75 | 75 | ||
| 76 | /// Command to make the gpu look into pending requests | ||
| 77 | struct GPUTickCommand final {}; | ||
| 78 | |||
| 76 | using CommandData = | 79 | using CommandData = |
| 77 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | 80 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
| 78 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>; | 81 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, |
| 82 | GPUTickCommand>; | ||
| 79 | 83 | ||
| 80 | struct CommandDataContainer { | 84 | struct CommandDataContainer { |
| 81 | CommandDataContainer() = default; | 85 | CommandDataContainer() = default; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e52e5961f..fbd81b895 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -601,6 +601,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 601 | EndTransformFeedback(); | 601 | EndTransformFeedback(); |
| 602 | 602 | ||
| 603 | ++num_queued_commands; | 603 | ++num_queued_commands; |
| 604 | |||
| 605 | system.GPU().TickWork(); | ||
| 604 | } | 606 | } |
| 605 | 607 | ||
| 606 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 608 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -628,6 +630,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 628 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 630 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 629 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 631 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 630 | ++num_queued_commands; | 632 | ++num_queued_commands; |
| 633 | system.GPU().TickWork(); | ||
| 631 | } | 634 | } |
| 632 | 635 | ||
| 633 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 636 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { |
| @@ -652,6 +655,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 652 | } | 655 | } |
| 653 | 656 | ||
| 654 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | 657 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { |
| 658 | if (!Settings::IsGPULevelExtreme()) { | ||
| 659 | return buffer_cache.MustFlushRegion(addr, size); | ||
| 660 | } | ||
| 655 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | 661 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); |
| 656 | } | 662 | } |
| 657 | 663 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 507262c8f..926ecf38e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -365,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 365 | }); | 365 | }); |
| 366 | 366 | ||
| 367 | EndTransformFeedback(); | 367 | EndTransformFeedback(); |
| 368 | |||
| 369 | system.GPU().TickWork(); | ||
| 368 | } | 370 | } |
| 369 | 371 | ||
| 370 | void RasterizerVulkan::Clear() { | 372 | void RasterizerVulkan::Clear() { |
| @@ -492,6 +494,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 492 | descriptor_set, {}); | 494 | descriptor_set, {}); |
| 493 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); | 495 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); |
| 494 | }); | 496 | }); |
| 497 | |||
| 498 | system.GPU().TickWork(); | ||
| 495 | } | 499 | } |
| 496 | 500 | ||
| 497 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | 501 | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { |