diff options
| -rw-r--r-- | src/core/core.cpp | 2 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 4 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | 33 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/interface.cpp | 4 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/nvdrv.cpp | 4 | ||||
| -rw-r--r-- | src/core/hle/service/nvflinger/nvflinger.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 13 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 6 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/gpu_asynch.h | 1 | ||||
| -rw-r--r-- | src/video_core/gpu_synch.h | 1 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/gpu_thread.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 5 |
16 files changed, 67 insertions, 52 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index 4d0ac72a5..ddc767e30 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -256,6 +256,8 @@ struct System::Impl { | |||
| 256 | is_powered_on = false; | 256 | is_powered_on = false; |
| 257 | exit_lock = false; | 257 | exit_lock = false; |
| 258 | 258 | ||
| 259 | gpu_core->WaitIdle(); | ||
| 260 | |||
| 259 | // Shutdown emulation session | 261 | // Shutdown emulation session |
| 260 | renderer.reset(); | 262 | renderer.reset(); |
| 261 | GDBStub::Shutdown(); | 263 | GDBStub::Shutdown(); |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bc..3f7b8e670 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | ||
| 8 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | 9 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvmap.h" | 10 | #include "core/hle/service/nvdrv/devices/nvmap.h" |
| 10 | #include "core/perf_stats.h" | 11 | #include "core/perf_stats.h" |
| @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 38 | transform, crop_rect}; | 39 | transform, crop_rect}; |
| 39 | 40 | ||
| 40 | system.GetPerfStats().EndGameFrame(); | 41 | system.GetPerfStats().EndGameFrame(); |
| 42 | system.GetPerfStats().EndSystemFrame(); | ||
| 41 | system.GPU().SwapBuffers(&framebuffer); | 43 | system.GPU().SwapBuffers(&framebuffer); |
| 44 | system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||
| 45 | system.GetPerfStats().BeginSystemFrame(); | ||
| 42 | } | 46 | } |
| 43 | 47 | ||
| 44 | } // namespace Service::Nvidia::Devices | 48 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index eb88fee1b..b27ee0502 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 63 | return NvResult::BadParameter; | 63 | return NvResult::BadParameter; |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | u32 event_id = params.value & 0x00FF; | ||
| 67 | |||
| 68 | if (event_id >= MaxNvEvents) { | ||
| 69 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 70 | return NvResult::BadParameter; | ||
| 71 | } | ||
| 72 | |||
| 73 | auto event = events_interface.events[event_id]; | ||
| 66 | auto& gpu = system.GPU(); | 74 | auto& gpu = system.GPU(); |
| 67 | // This is mostly to take into account unimplemented features. As synced | 75 | // This is mostly to take into account unimplemented features. As synced |
| 68 | // gpu is always synced. | 76 | // gpu is always synced. |
| 69 | if (!gpu.IsAsync()) { | 77 | if (!gpu.IsAsync()) { |
| 78 | event.writable->Signal(); | ||
| 70 | return NvResult::Success; | 79 | return NvResult::Success; |
| 71 | } | 80 | } |
| 72 | auto lock = gpu.LockSync(); | 81 | auto lock = gpu.LockSync(); |
| 73 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | 82 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); |
| 74 | const s32 diff = current_syncpoint_value - params.threshold; | 83 | const s32 diff = current_syncpoint_value - params.threshold; |
| 75 | if (diff >= 0) { | 84 | if (diff >= 0) { |
| 85 | event.writable->Signal(); | ||
| 76 | params.value = current_syncpoint_value; | 86 | params.value = current_syncpoint_value; |
| 77 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 87 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 78 | return NvResult::Success; | 88 | return NvResult::Success; |
| @@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 88 | return NvResult::Timeout; | 98 | return NvResult::Timeout; |
| 89 | } | 99 | } |
| 90 | 100 | ||
| 91 | u32 event_id; | ||
| 92 | if (is_async) { | ||
| 93 | event_id = params.value & 0x00FF; | ||
| 94 | if (event_id >= MaxNvEvents) { | ||
| 95 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 96 | return NvResult::BadParameter; | ||
| 97 | } | ||
| 98 | } else { | ||
| 99 | if (ctrl.fresh_call) { | ||
| 100 | const auto result = events_interface.GetFreeEvent(); | ||
| 101 | if (result) { | ||
| 102 | event_id = *result; | ||
| 103 | } else { | ||
| 104 | LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); | ||
| 105 | event_id = params.value & 0x00FF; | ||
| 106 | } | ||
| 107 | } else { | ||
| 108 | event_id = ctrl.event_id; | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | EventState status = events_interface.status[event_id]; | 101 | EventState status = events_interface.status[event_id]; |
| 113 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | 102 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { |
| 114 | events_interface.SetEventStatus(event_id, EventState::Waiting); | 103 | events_interface.SetEventStatus(event_id, EventState::Waiting); |
| @@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 120 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | 109 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; |
| 121 | } | 110 | } |
| 122 | params.value |= event_id; | 111 | params.value |= event_id; |
| 123 | events_interface.events[event_id].writable->Clear(); | 112 | event.writable->Clear(); |
| 124 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | 113 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); |
| 125 | if (!is_async && ctrl.fresh_call) { | 114 | if (!is_async && ctrl.fresh_call) { |
| 126 | ctrl.must_delay = true; | 115 | ctrl.must_delay = true; |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602..68d139cfb 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { | |||
| 134 | IPC::ResponseBuilder rb{ctx, 3, 1}; | 134 | IPC::ResponseBuilder rb{ctx, 3, 1}; |
| 135 | rb.Push(RESULT_SUCCESS); | 135 | rb.Push(RESULT_SUCCESS); |
| 136 | if (event_id < MaxNvEvents) { | 136 | if (event_id < MaxNvEvents) { |
| 137 | rb.PushCopyObjects(nvdrv->GetEvent(event_id)); | 137 | auto event = nvdrv->GetEvent(event_id); |
| 138 | event->Clear(); | ||
| 139 | rb.PushCopyObjects(event); | ||
| 138 | rb.Push<u32>(NvResult::Success); | 140 | rb.Push<u32>(NvResult::Success); |
| 139 | } else { | 141 | } else { |
| 140 | rb.Push<u32>(0); | 142 | rb.Push<u32>(0); |
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e928..7bfb99e34 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -40,8 +40,8 @@ Module::Module(Core::System& system) { | |||
| 40 | auto& kernel = system.Kernel(); | 40 | auto& kernel = system.Kernel(); |
| 41 | for (u32 i = 0; i < MaxNvEvents; i++) { | 41 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); | 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |
| 43 | events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( | 43 | events_interface.events[i] = |
| 44 | kernel, Kernel::ResetType::Automatic, event_label); | 44 | Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); |
| 45 | events_interface.status[i] = EventState::Free; | 45 | events_interface.status[i] = EventState::Free; |
| 46 | events_interface.registered[i] = false; | 46 | events_interface.registered[i] = false; |
| 47 | } | 47 | } |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 368f83f6c..cc9522aad 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -187,14 +187,18 @@ void NVFlinger::Compose() { | |||
| 187 | MicroProfileFlip(); | 187 | MicroProfileFlip(); |
| 188 | 188 | ||
| 189 | if (!buffer) { | 189 | if (!buffer) { |
| 190 | // There was no queued buffer to draw, render previous frame | ||
| 191 | system.GetPerfStats().EndGameFrame(); | ||
| 192 | system.GPU().SwapBuffers({}); | ||
| 193 | continue; | 190 | continue; |
| 194 | } | 191 | } |
| 195 | 192 | ||
| 196 | const auto& igbp_buffer = buffer->get().igbp_buffer; | 193 | const auto& igbp_buffer = buffer->get().igbp_buffer; |
| 197 | 194 | ||
| 195 | const auto& gpu = system.GPU(); | ||
| 196 | const auto& multi_fence = buffer->get().multi_fence; | ||
| 197 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | ||
| 198 | const auto& fence = multi_fence.fences[fence_id]; | ||
| 199 | gpu.WaitFence(fence.id, fence.value); | ||
| 200 | } | ||
| 201 | |||
| 198 | // Now send the buffer to the GPU for drawing. | 202 | // Now send the buffer to the GPU for drawing. |
| 199 | // TODO(Subv): Support more than just disp0. The display device selection is probably based | 203 | // TODO(Subv): Support more than just disp0. The display device selection is probably based |
| 200 | // on which display we're drawing (Default, Internal, External, etc) | 204 | // on which display we're drawing (Default, Internal, External, etc) |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index fbb9105d6..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | ||
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| @@ -17,6 +18,8 @@ | |||
| 17 | 18 | ||
| 18 | namespace Tegra { | 19 | namespace Tegra { |
| 19 | 20 | ||
| 21 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 22 | |||
| 20 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | 23 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 21 | : system{system}, renderer{renderer}, is_async{is_async} { | 24 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 22 | auto& rasterizer{renderer.Rasterizer()}; | 25 | auto& rasterizer{renderer.Rasterizer()}; |
| @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 63 | return *dma_pusher; | 66 | return *dma_pusher; |
| 64 | } | 67 | } |
| 65 | 68 | ||
| 69 | void GPU::WaitFence(u32 syncpoint_id, u32 value) const { | ||
| 70 | // Synced GPU, is always in sync | ||
| 71 | if (!is_async) { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 75 | while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 66 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | 79 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { |
| 67 | syncpoints[syncpoint_id]++; | 80 | syncpoints[syncpoint_id]++; |
| 68 | std::lock_guard lock{sync_mutex}; | 81 | std::lock_guard lock{sync_mutex}; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -177,6 +177,12 @@ public: | |||
| 177 | /// Returns a reference to the GPU DMA pusher. | 177 | /// Returns a reference to the GPU DMA pusher. |
| 178 | Tegra::DmaPusher& DmaPusher(); | 178 | Tegra::DmaPusher& DmaPusher(); |
| 179 | 179 | ||
| 180 | // Waits for the GPU to finish working | ||
| 181 | virtual void WaitIdle() const = 0; | ||
| 182 | |||
| 183 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||
| 184 | void WaitFence(u32 syncpoint_id, u32 value) const; | ||
| 185 | |||
| 180 | void IncrementSyncPoint(u32 syncpoint_id); | 186 | void IncrementSyncPoint(u32 syncpoint_id); |
| 181 | 187 | ||
| 182 | u32 GetSyncpointValue(u32 syncpoint_id) const; | 188 | u32 GetSyncpointValue(u32 syncpoint_id) const; |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con | |||
| 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void GPUAsynch::WaitIdle() const { | ||
| 48 | gpu_thread.WaitIdle(); | ||
| 49 | } | ||
| 50 | |||
| 47 | } // namespace VideoCommon | 51 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -25,6 +25,7 @@ public: | |||
| 25 | void FlushRegion(CacheAddr addr, u64 size) override; | 25 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | void WaitIdle() const override; | ||
| 28 | 29 | ||
| 29 | protected: | 30 | protected: |
| 30 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | 31 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -24,6 +24,7 @@ public: | |||
| 24 | void FlushRegion(CacheAddr addr, u64 size) override; | 24 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void WaitIdle() const override {} | ||
| 27 | 28 | ||
| 28 | protected: | 29 | protected: |
| 29 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | 30 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | ||
| 9 | #include "core/core_timing_util.h" | ||
| 10 | #include "core/frontend/scope_acquire_window_context.h" | 8 | #include "core/frontend/scope_acquire_window_context.h" |
| 11 | #include "video_core/dma_pusher.h" | 9 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { | |||
| 68 | 66 | ||
| 69 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { | 67 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { |
| 70 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; | 68 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; |
| 71 | synchronization_event = system.CoreTiming().RegisterEvent( | ||
| 72 | "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||
| 73 | } | 69 | } |
| 74 | 70 | ||
| 75 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 71 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 76 | const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; | 72 | PushCommand(SubmitListCommand(std::move(entries))); |
| 77 | const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; | ||
| 78 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | ||
| 79 | } | 73 | } |
| 80 | 74 | ||
| 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 75 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 96 | InvalidateRegion(addr, size); | 90 | InvalidateRegion(addr, size); |
| 97 | } | 91 | } |
| 98 | 92 | ||
| 93 | void ThreadManager::WaitIdle() const { | ||
| 94 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 99 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 100 | const u64 fence{++state.last_fence}; | 99 | const u64 fence{++state.last_fence}; |
| 101 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 102 | return fence; | 101 | return fence; |
| 103 | } | 102 | } |
| 104 | 103 | ||
| 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 106 | void SynchState::WaitForSynchronization(u64 fence) { | ||
| 107 | while (signaled_fence.load() < fence) | ||
| 108 | ; | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::GPUThread | 104 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -21,9 +21,6 @@ class DmaPusher; | |||
| 21 | 21 | ||
| 22 | namespace Core { | 22 | namespace Core { |
| 23 | class System; | 23 | class System; |
| 24 | namespace Timing { | ||
| 25 | struct EventType; | ||
| 26 | } // namespace Timing | ||
| 27 | } // namespace Core | 24 | } // namespace Core |
| 28 | 25 | ||
| 29 | namespace VideoCommon::GPUThread { | 26 | namespace VideoCommon::GPUThread { |
| @@ -89,8 +86,6 @@ struct CommandDataContainer { | |||
| 89 | struct SynchState final { | 86 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 87 | std::atomic_bool is_running{true}; |
| 91 | 88 | ||
| 92 | void WaitForSynchronization(u64 fence); | ||
| 93 | |||
| 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 89 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 95 | CommandQueue queue; | 90 | CommandQueue queue; |
| 96 | u64 last_fence{}; | 91 | u64 last_fence{}; |
| @@ -121,6 +116,9 @@ public: | |||
| 121 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 116 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 122 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | 117 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); |
| 123 | 118 | ||
| 119 | // Wait until the gpu thread is idle. | ||
| 120 | void WaitIdle() const; | ||
| 121 | |||
| 124 | private: | 122 | private: |
| 125 | /// Pushes a command to be executed by the GPU thread | 123 | /// Pushes a command to be executed by the GPU thread |
| 126 | u64 PushCommand(CommandData&& command_data); | 124 | u64 PushCommand(CommandData&& command_data); |
| @@ -128,7 +126,6 @@ private: | |||
| 128 | private: | 126 | private: |
| 129 | SynchState state; | 127 | SynchState state; |
| 130 | Core::System& system; | 128 | Core::System& system; |
| 131 | Core::Timing::EventType* synchronization_event{}; | ||
| 132 | std::thread thread; | 129 | std::thread thread; |
| 133 | std::thread::id thread_id; | 130 | std::thread::id thread_id; |
| 134 | }; | 131 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..cbcf81414 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | |||
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 350 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 351 | std::lock_guard lock{pages_mutex}; | ||
| 351 | const u64 page_start{addr >> Memory::PAGE_BITS}; | 352 | const u64 page_start{addr >> Memory::PAGE_BITS}; |
| 352 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | 353 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; |
| 353 | 354 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <map> | 10 | #include <map> |
| 11 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 12 | #include <optional> | 13 | #include <optional> |
| 13 | #include <tuple> | 14 | #include <tuple> |
| 14 | #include <utility> | 15 | #include <utility> |
| @@ -230,6 +231,8 @@ private: | |||
| 230 | 231 | ||
| 231 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 232 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 232 | CachedPageMap cached_pages; | 233 | CachedPageMap cached_pages; |
| 234 | |||
| 235 | std::mutex pages_mutex; | ||
| 233 | }; | 236 | }; |
| 234 | 237 | ||
| 235 | } // namespace OpenGL | 238 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 105 | system.GetPerfStats().EndSystemFrame(); | ||
| 106 | |||
| 107 | // Maintain the rasterizer's state as a priority | 105 | // Maintain the rasterizer's state as a priority |
| 108 | OpenGLState prev_state = OpenGLState::GetCurState(); | 106 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 109 | state.AllDirty(); | 107 | state.AllDirty(); |
| @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 135 | 133 | ||
| 136 | render_window.PollEvents(); | 134 | render_window.PollEvents(); |
| 137 | 135 | ||
| 138 | system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||
| 139 | system.GetPerfStats().BeginSystemFrame(); | ||
| 140 | |||
| 141 | // Restore the rasterizer state | 136 | // Restore the rasterizer state |
| 142 | prev_state.AllDirty(); | 137 | prev_state.AllDirty(); |
| 143 | prev_state.Apply(); | 138 | prev_state.Apply(); |