summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/core.cpp1
-rw-r--r--src/core/hle/kernel/k_auto_object.h4
-rw-r--r--src/core/hle/kernel/kernel.cpp31
-rw-r--r--src/core/hle/service/ipc_helpers.h1
-rw-r--r--src/core/hle/service/kernel_helpers.cpp3
-rw-r--r--src/core/hle/service/mutex.cpp3
-rw-r--r--src/core/hle/service/server_manager.cpp6
-rw-r--r--src/core/hle/service/sm/sm.cpp3
-rw-r--r--src/core/hle/service/sm/sm_controller.cpp3
-rw-r--r--src/core/memory.cpp2
-rw-r--r--src/video_core/fence_manager.h143
-rw-r--r--src/video_core/memory_manager.cpp10
-rw-r--r--src/video_core/memory_manager.h4
-rw-r--r--src/video_core/query_cache.h137
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
23 files changed, 336 insertions, 85 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index ac0fb7872..06fba4ce5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -293,6 +293,7 @@ struct System::Impl {
293 ASSERT(Kernel::KProcess::Initialize(main_process, system, "main", 293 ASSERT(Kernel::KProcess::Initialize(main_process, system, "main",
294 Kernel::KProcess::ProcessType::Userland, resource_limit) 294 Kernel::KProcess::ProcessType::Userland, resource_limit)
295 .IsSuccess()); 295 .IsSuccess());
296 Kernel::KProcess::Register(system.Kernel(), main_process);
296 kernel.MakeApplicationProcess(main_process); 297 kernel.MakeApplicationProcess(main_process);
297 const auto [load_result, load_parameters] = app_loader->Load(*main_process, system); 298 const auto [load_result, load_parameters] = app_loader->Load(*main_process, system);
298 if (load_result != Loader::ResultStatus::Success) { 299 if (load_result != Loader::ResultStatus::Success) {
diff --git a/src/core/hle/kernel/k_auto_object.h b/src/core/hle/kernel/k_auto_object.h
index 9b71fe371..f384b1568 100644
--- a/src/core/hle/kernel/k_auto_object.h
+++ b/src/core/hle/kernel/k_auto_object.h
@@ -182,8 +182,8 @@ public:
182 explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {} 182 explicit KAutoObjectWithList(KernelCore& kernel) : KAutoObject(kernel) {}
183 183
184 static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) { 184 static int Compare(const KAutoObjectWithList& lhs, const KAutoObjectWithList& rhs) {
185 const u64 lid = lhs.GetId(); 185 const uintptr_t lid = reinterpret_cast<uintptr_t>(std::addressof(lhs));
186 const u64 rid = rhs.GetId(); 186 const uintptr_t rid = reinterpret_cast<uintptr_t>(std::addressof(rhs));
187 187
188 if (lid < rid) { 188 if (lid < rid) {
189 return -1; 189 return -1;
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4f3366c9d..f33600ca5 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -95,7 +95,7 @@ struct KernelCore::Impl {
95 pt_heap_region.GetSize()); 95 pt_heap_region.GetSize());
96 } 96 }
97 97
98 InitializeHackSharedMemory(); 98 InitializeHackSharedMemory(kernel);
99 RegisterHostThread(nullptr); 99 RegisterHostThread(nullptr);
100 } 100 }
101 101
@@ -216,10 +216,12 @@ struct KernelCore::Impl {
216 auto* main_thread{Kernel::KThread::Create(system.Kernel())}; 216 auto* main_thread{Kernel::KThread::Create(system.Kernel())};
217 main_thread->SetCurrentCore(core); 217 main_thread->SetCurrentCore(core);
218 ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess()); 218 ASSERT(Kernel::KThread::InitializeMainThread(system, main_thread, core).IsSuccess());
219 KThread::Register(system.Kernel(), main_thread);
219 220
220 auto* idle_thread{Kernel::KThread::Create(system.Kernel())}; 221 auto* idle_thread{Kernel::KThread::Create(system.Kernel())};
221 idle_thread->SetCurrentCore(core); 222 idle_thread->SetCurrentCore(core);
222 ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess()); 223 ASSERT(Kernel::KThread::InitializeIdleThread(system, idle_thread, core).IsSuccess());
224 KThread::Register(system.Kernel(), idle_thread);
223 225
224 schedulers[i]->Initialize(main_thread, idle_thread, core); 226 schedulers[i]->Initialize(main_thread, idle_thread, core);
225 } 227 }
@@ -230,6 +232,7 @@ struct KernelCore::Impl {
230 const Core::Timing::CoreTiming& core_timing) { 232 const Core::Timing::CoreTiming& core_timing) {
231 system_resource_limit = KResourceLimit::Create(system.Kernel()); 233 system_resource_limit = KResourceLimit::Create(system.Kernel());
232 system_resource_limit->Initialize(&core_timing); 234 system_resource_limit->Initialize(&core_timing);
235 KResourceLimit::Register(kernel, system_resource_limit);
233 236
234 const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()}; 237 const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
235 const auto total_size{sizes.first}; 238 const auto total_size{sizes.first};
@@ -355,6 +358,7 @@ struct KernelCore::Impl {
355 ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {}, 358 ASSERT(KThread::InitializeHighPriorityThread(system, shutdown_threads[core_id], {}, {},
356 core_id) 359 core_id)
357 .IsSuccess()); 360 .IsSuccess());
361 KThread::Register(system.Kernel(), shutdown_threads[core_id]);
358 } 362 }
359 } 363 }
360 364
@@ -729,7 +733,7 @@ struct KernelCore::Impl {
729 memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize()); 733 memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
730 } 734 }
731 735
732 void InitializeHackSharedMemory() { 736 void InitializeHackSharedMemory(KernelCore& kernel) {
733 // Setup memory regions for emulated processes 737 // Setup memory regions for emulated processes
734 // TODO(bunnei): These should not be hardcoded regions initialized within the kernel 738 // TODO(bunnei): These should not be hardcoded regions initialized within the kernel
735 constexpr std::size_t hid_size{0x40000}; 739 constexpr std::size_t hid_size{0x40000};
@@ -746,14 +750,23 @@ struct KernelCore::Impl {
746 750
747 hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 751 hid_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
748 Svc::MemoryPermission::Read, hid_size); 752 Svc::MemoryPermission::Read, hid_size);
753 KSharedMemory::Register(kernel, hid_shared_mem);
754
749 font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 755 font_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
750 Svc::MemoryPermission::Read, font_size); 756 Svc::MemoryPermission::Read, font_size);
757 KSharedMemory::Register(kernel, font_shared_mem);
758
751 irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 759 irs_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
752 Svc::MemoryPermission::Read, irs_size); 760 Svc::MemoryPermission::Read, irs_size);
761 KSharedMemory::Register(kernel, irs_shared_mem);
762
753 time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 763 time_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
754 Svc::MemoryPermission::Read, time_size); 764 Svc::MemoryPermission::Read, time_size);
765 KSharedMemory::Register(kernel, time_shared_mem);
766
755 hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None, 767 hidbus_shared_mem->Initialize(system.DeviceMemory(), nullptr, Svc::MemoryPermission::None,
756 Svc::MemoryPermission::Read, hidbus_size); 768 Svc::MemoryPermission::Read, hidbus_size);
769 KSharedMemory::Register(kernel, hidbus_shared_mem);
757 } 770 }
758 771
759 std::mutex registered_objects_lock; 772 std::mutex registered_objects_lock;
@@ -1072,12 +1085,15 @@ static std::jthread RunHostThreadFunc(KernelCore& kernel, KProcess* process,
1072 // Commit the thread reservation. 1085 // Commit the thread reservation.
1073 thread_reservation.Commit(); 1086 thread_reservation.Commit();
1074 1087
1088 // Register the thread.
1089 KThread::Register(kernel, thread);
1090
1075 return std::jthread( 1091 return std::jthread(
1076 [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] { 1092 [&kernel, thread, thread_name{std::move(thread_name)}, func{std::move(func)}] {
1077 // Set the thread name. 1093 // Set the thread name.
1078 Common::SetCurrentThreadName(thread_name.c_str()); 1094 Common::SetCurrentThreadName(thread_name.c_str());
1079 1095
1080 // Register the thread. 1096 // Set the thread as current.
1081 kernel.RegisterHostThread(thread); 1097 kernel.RegisterHostThread(thread);
1082 1098
1083 // Run the callback. 1099 // Run the callback.
@@ -1099,6 +1115,9 @@ std::jthread KernelCore::RunOnHostCoreProcess(std::string&& process_name,
1099 // Ensure that we don't hold onto any extra references. 1115 // Ensure that we don't hold onto any extra references.
1100 SCOPE_EXIT({ process->Close(); }); 1116 SCOPE_EXIT({ process->Close(); });
1101 1117
1118 // Register the new process.
1119 KProcess::Register(*this, process);
1120
1102 // Run the host thread. 1121 // Run the host thread.
1103 return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func)); 1122 return RunHostThreadFunc(*this, process, std::move(process_name), std::move(func));
1104} 1123}
@@ -1124,6 +1143,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
1124 // Ensure that we don't hold onto any extra references. 1143 // Ensure that we don't hold onto any extra references.
1125 SCOPE_EXIT({ process->Close(); }); 1144 SCOPE_EXIT({ process->Close(); });
1126 1145
1146 // Register the new process.
1147 KProcess::Register(*this, process);
1148
1127 // Reserve a new thread from the process resource limit. 1149 // Reserve a new thread from the process resource limit.
1128 KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax); 1150 KScopedResourceReservation thread_reservation(process, LimitableResource::ThreadCountMax);
1129 ASSERT(thread_reservation.Succeeded()); 1151 ASSERT(thread_reservation.Succeeded());
@@ -1136,6 +1158,9 @@ void KernelCore::RunOnGuestCoreProcess(std::string&& process_name, std::function
1136 // Commit the thread reservation. 1158 // Commit the thread reservation.
1137 thread_reservation.Commit(); 1159 thread_reservation.Commit();
1138 1160
1161 // Register the new thread.
1162 KThread::Register(*this, thread);
1163
1139 // Begin running the thread. 1164 // Begin running the thread.
1140 ASSERT(R_SUCCEEDED(thread->Run())); 1165 ASSERT(R_SUCCEEDED(thread->Run()));
1141} 1166}
diff --git a/src/core/hle/service/ipc_helpers.h b/src/core/hle/service/ipc_helpers.h
index e4cb4e1f2..0e222362e 100644
--- a/src/core/hle/service/ipc_helpers.h
+++ b/src/core/hle/service/ipc_helpers.h
@@ -156,6 +156,7 @@ public:
156 156
157 auto* session = Kernel::KSession::Create(kernel); 157 auto* session = Kernel::KSession::Create(kernel);
158 session->Initialize(nullptr, 0); 158 session->Initialize(nullptr, 0);
159 Kernel::KSession::Register(kernel, session);
159 160
160 auto next_manager = std::make_shared<Service::SessionRequestManager>( 161 auto next_manager = std::make_shared<Service::SessionRequestManager>(
161 kernel, manager->GetServerManager()); 162 kernel, manager->GetServerManager());
diff --git a/src/core/hle/service/kernel_helpers.cpp b/src/core/hle/service/kernel_helpers.cpp
index a39ce5212..6a313a03b 100644
--- a/src/core/hle/service/kernel_helpers.cpp
+++ b/src/core/hle/service/kernel_helpers.cpp
@@ -25,6 +25,9 @@ ServiceContext::ServiceContext(Core::System& system_, std::string name_)
25 Kernel::KProcess::ProcessType::KernelInternal, 25 Kernel::KProcess::ProcessType::KernelInternal,
26 kernel.GetSystemResourceLimit()) 26 kernel.GetSystemResourceLimit())
27 .IsSuccess()); 27 .IsSuccess());
28
29 // Register the process.
30 Kernel::KProcess::Register(kernel, process);
28 process_created = true; 31 process_created = true;
29} 32}
30 33
diff --git a/src/core/hle/service/mutex.cpp b/src/core/hle/service/mutex.cpp
index 07589a0f0..b0ff71d1b 100644
--- a/src/core/hle/service/mutex.cpp
+++ b/src/core/hle/service/mutex.cpp
@@ -12,6 +12,9 @@ Mutex::Mutex(Core::System& system) : m_system(system) {
12 m_event = Kernel::KEvent::Create(system.Kernel()); 12 m_event = Kernel::KEvent::Create(system.Kernel());
13 m_event->Initialize(nullptr); 13 m_event->Initialize(nullptr);
14 14
15 // Register the event.
16 Kernel::KEvent::Register(system.Kernel(), m_event);
17
15 ASSERT(R_SUCCEEDED(m_event->Signal())); 18 ASSERT(R_SUCCEEDED(m_event->Signal()));
16} 19}
17 20
diff --git a/src/core/hle/service/server_manager.cpp b/src/core/hle/service/server_manager.cpp
index 6b4a1291e..156bc27d8 100644
--- a/src/core/hle/service/server_manager.cpp
+++ b/src/core/hle/service/server_manager.cpp
@@ -33,6 +33,9 @@ ServerManager::ServerManager(Core::System& system) : m_system{system}, m_serve_m
33 // Initialize event. 33 // Initialize event.
34 m_event = Kernel::KEvent::Create(system.Kernel()); 34 m_event = Kernel::KEvent::Create(system.Kernel());
35 m_event->Initialize(nullptr); 35 m_event->Initialize(nullptr);
36
37 // Register event.
38 Kernel::KEvent::Register(system.Kernel(), m_event);
36} 39}
37 40
38ServerManager::~ServerManager() { 41ServerManager::~ServerManager() {
@@ -160,6 +163,9 @@ Result ServerManager::ManageDeferral(Kernel::KEvent** out_event) {
160 // Initialize the event. 163 // Initialize the event.
161 m_deferral_event->Initialize(nullptr); 164 m_deferral_event->Initialize(nullptr);
162 165
166 // Register the event.
167 Kernel::KEvent::Register(m_system.Kernel(), m_deferral_event);
168
163 // Set the output. 169 // Set the output.
164 *out_event = m_deferral_event; 170 *out_event = m_deferral_event;
165 171
diff --git a/src/core/hle/service/sm/sm.cpp b/src/core/hle/service/sm/sm.cpp
index c45be5726..1608fa24c 100644
--- a/src/core/hle/service/sm/sm.cpp
+++ b/src/core/hle/service/sm/sm.cpp
@@ -64,6 +64,9 @@ Result ServiceManager::RegisterService(std::string name, u32 max_sessions,
64 auto* port = Kernel::KPort::Create(kernel); 64 auto* port = Kernel::KPort::Create(kernel);
65 port->Initialize(ServerSessionCountMax, false, 0); 65 port->Initialize(ServerSessionCountMax, false, 0);
66 66
67 // Register the port.
68 Kernel::KPort::Register(kernel, port);
69
67 service_ports.emplace(name, port); 70 service_ports.emplace(name, port);
68 registered_services.emplace(name, handler); 71 registered_services.emplace(name, handler);
69 if (deferral_event) { 72 if (deferral_event) {
diff --git a/src/core/hle/service/sm/sm_controller.cpp b/src/core/hle/service/sm/sm_controller.cpp
index 419c1df2b..7dce28fe0 100644
--- a/src/core/hle/service/sm/sm_controller.cpp
+++ b/src/core/hle/service/sm/sm_controller.cpp
@@ -49,6 +49,9 @@ void Controller::CloneCurrentObject(HLERequestContext& ctx) {
49 // Commit the session reservation. 49 // Commit the session reservation.
50 session_reservation.Commit(); 50 session_reservation.Commit();
51 51
52 // Register the session.
53 Kernel::KSession::Register(system.Kernel(), session);
54
52 // Register with server manager. 55 // Register with server manager.
53 session_manager->GetServerManager().RegisterSession(&session->GetServerSession(), 56 session_manager->GetServerManager().RegisterSession(&session->GetServerSession(),
54 session_manager); 57 session_manager);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 432310632..a9667463f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -462,7 +462,7 @@ struct Memory::Impl {
462 } 462 }
463 463
464 if (Settings::IsFastmemEnabled()) { 464 if (Settings::IsFastmemEnabled()) {
465 const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; 465 const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
466 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); 466 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
467 } 467 }
468 468
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index c390ac91b..3b2f6aab6 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -4,13 +4,20 @@
4#pragma once 4#pragma once
5 5
6#include <algorithm> 6#include <algorithm>
7#include <condition_variable>
7#include <cstring> 8#include <cstring>
8#include <deque> 9#include <deque>
9#include <functional> 10#include <functional>
10#include <memory> 11#include <memory>
12#include <mutex>
13#include <thread>
11#include <queue> 14#include <queue>
12 15
13#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/microprofile.h"
18#include "common/scope_exit.h"
19#include "common/settings.h"
20#include "common/thread.h"
14#include "video_core/delayed_destruction_ring.h" 21#include "video_core/delayed_destruction_ring.h"
15#include "video_core/gpu.h" 22#include "video_core/gpu.h"
16#include "video_core/host1x/host1x.h" 23#include "video_core/host1x/host1x.h"
@@ -23,15 +30,26 @@ class FenceBase {
23public: 30public:
24 explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} 31 explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
25 32
33 bool IsStubbed() const {
34 return is_stubbed;
35 }
36
26protected: 37protected:
27 bool is_stubbed; 38 bool is_stubbed;
28}; 39};
29 40
30template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> 41template <typename Traits>
31class FenceManager { 42class FenceManager {
43 using TFence = typename Traits::FenceType;
44 using TTextureCache = typename Traits::TextureCacheType;
45 using TBufferCache = typename Traits::BufferCacheType;
46 using TQueryCache = typename Traits::QueryCacheType;
47 static constexpr bool can_async_check = Traits::HAS_ASYNC_CHECK;
48
32public: 49public:
33 /// Notify the fence manager about a new frame 50 /// Notify the fence manager about a new frame
34 void TickFrame() { 51 void TickFrame() {
52 std::unique_lock lock(ring_guard);
35 delayed_destruction_ring.Tick(); 53 delayed_destruction_ring.Tick();
36 } 54 }
37 55
@@ -46,17 +64,33 @@ public:
46 } 64 }
47 65
48 void SignalFence(std::function<void()>&& func) { 66 void SignalFence(std::function<void()>&& func) {
49 TryReleasePendingFences(); 67 rasterizer.InvalidateGPUCache();
68 bool delay_fence = Settings::IsGPULevelHigh();
69 if constexpr (!can_async_check) {
70 TryReleasePendingFences<false>();
71 }
50 const bool should_flush = ShouldFlush(); 72 const bool should_flush = ShouldFlush();
51 CommitAsyncFlushes(); 73 CommitAsyncFlushes();
52 uncommitted_operations.emplace_back(std::move(func));
53 CommitOperations();
54 TFence new_fence = CreateFence(!should_flush); 74 TFence new_fence = CreateFence(!should_flush);
55 fences.push(new_fence); 75 if constexpr (can_async_check) {
76 guard.lock();
77 }
78 if (delay_fence) {
79 uncommitted_operations.emplace_back(std::move(func));
80 }
81 pending_operations.emplace_back(std::move(uncommitted_operations));
56 QueueFence(new_fence); 82 QueueFence(new_fence);
83 if (!delay_fence) {
84 func();
85 }
86 fences.push(std::move(new_fence));
57 if (should_flush) { 87 if (should_flush) {
58 rasterizer.FlushCommands(); 88 rasterizer.FlushCommands();
59 } 89 }
90 if constexpr (can_async_check) {
91 guard.unlock();
92 cv.notify_all();
93 }
60 } 94 }
61 95
62 void SignalSyncPoint(u32 value) { 96 void SignalSyncPoint(u32 value) {
@@ -66,29 +100,30 @@ public:
66 } 100 }
67 101
68 void WaitPendingFences() { 102 void WaitPendingFences() {
69 while (!fences.empty()) { 103 if constexpr (!can_async_check) {
70 TFence& current_fence = fences.front(); 104 TryReleasePendingFences<true>();
71 if (ShouldWait()) {
72 WaitFence(current_fence);
73 }
74 PopAsyncFlushes();
75 auto operations = std::move(pending_operations.front());
76 pending_operations.pop_front();
77 for (auto& operation : operations) {
78 operation();
79 }
80 PopFence();
81 } 105 }
82 } 106 }
83 107
84protected: 108protected:
85 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, 109 explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
86 TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, 110 TTextureCache& texture_cache_, TBufferCache& buffer_cache_,
87 TQueryCache& query_cache_) 111 TQueryCache& query_cache_)
88 : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, 112 : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()},
89 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} 113 texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {
114 if constexpr (can_async_check) {
115 fence_thread =
116 std::jthread([this](std::stop_token token) { ReleaseThreadFunc(token); });
117 }
118 }
90 119
91 virtual ~FenceManager() = default; 120 virtual ~FenceManager() {
121 if constexpr (can_async_check) {
122 fence_thread.request_stop();
123 cv.notify_all();
124 fence_thread.join();
125 }
126 }
92 127
93 /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is 128 /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
94 /// true 129 /// true
@@ -104,15 +139,20 @@ protected:
104 Tegra::GPU& gpu; 139 Tegra::GPU& gpu;
105 Tegra::Host1x::SyncpointManager& syncpoint_manager; 140 Tegra::Host1x::SyncpointManager& syncpoint_manager;
106 TTextureCache& texture_cache; 141 TTextureCache& texture_cache;
107 TTBufferCache& buffer_cache; 142 TBufferCache& buffer_cache;
108 TQueryCache& query_cache; 143 TQueryCache& query_cache;
109 144
110private: 145private:
146 template <bool force_wait>
111 void TryReleasePendingFences() { 147 void TryReleasePendingFences() {
112 while (!fences.empty()) { 148 while (!fences.empty()) {
113 TFence& current_fence = fences.front(); 149 TFence& current_fence = fences.front();
114 if (ShouldWait() && !IsFenceSignaled(current_fence)) { 150 if (ShouldWait() && !IsFenceSignaled(current_fence)) {
115 return; 151 if constexpr (force_wait) {
152 WaitFence(current_fence);
153 } else {
154 return;
155 }
116 } 156 }
117 PopAsyncFlushes(); 157 PopAsyncFlushes();
118 auto operations = std::move(pending_operations.front()); 158 auto operations = std::move(pending_operations.front());
@@ -120,7 +160,49 @@ private:
120 for (auto& operation : operations) { 160 for (auto& operation : operations) {
121 operation(); 161 operation();
122 } 162 }
123 PopFence(); 163 {
164 std::unique_lock lock(ring_guard);
165 delayed_destruction_ring.Push(std::move(current_fence));
166 }
167 fences.pop();
168 }
169 }
170
171 void ReleaseThreadFunc(std::stop_token stop_token) {
172 std::string name = "GPUFencingThread";
173 MicroProfileOnThreadCreate(name.c_str());
174
175 // Cleanup
176 SCOPE_EXIT({ MicroProfileOnThreadExit(); });
177
178 Common::SetCurrentThreadName(name.c_str());
179 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
180
181 TFence current_fence;
182 std::deque<std::function<void()>> current_operations;
183 while (!stop_token.stop_requested()) {
184 {
185 std::unique_lock lock(guard);
186 cv.wait(lock, [&] { return stop_token.stop_requested() || !fences.empty(); });
187 if (stop_token.stop_requested()) [[unlikely]] {
188 return;
189 }
190 current_fence = std::move(fences.front());
191 current_operations = std::move(pending_operations.front());
192 fences.pop();
193 pending_operations.pop_front();
194 }
195 if (!current_fence->IsStubbed()) {
196 WaitFence(current_fence);
197 }
198 PopAsyncFlushes();
199 for (auto& operation : current_operations) {
200 operation();
201 }
202 {
203 std::unique_lock lock(ring_guard);
204 delayed_destruction_ring.Push(std::move(current_fence));
205 }
124 } 206 }
125 } 207 }
126 208
@@ -154,19 +236,16 @@ private:
154 query_cache.CommitAsyncFlushes(); 236 query_cache.CommitAsyncFlushes();
155 } 237 }
156 238
157 void PopFence() {
158 delayed_destruction_ring.Push(std::move(fences.front()));
159 fences.pop();
160 }
161
162 void CommitOperations() {
163 pending_operations.emplace_back(std::move(uncommitted_operations));
164 }
165
166 std::queue<TFence> fences; 239 std::queue<TFence> fences;
167 std::deque<std::function<void()>> uncommitted_operations; 240 std::deque<std::function<void()>> uncommitted_operations;
168 std::deque<std::deque<std::function<void()>>> pending_operations; 241 std::deque<std::deque<std::function<void()>>> pending_operations;
169 242
243 std::mutex guard;
244 std::mutex ring_guard;
245 std::condition_variable cv;
246
247 std::jthread fence_thread;
248
170 DelayedDestructionRing<TFence, 6> delayed_destruction_ring; 249 DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
171}; 250};
172 251
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 01fb5b546..e06ce5d14 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -170,6 +170,7 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
170 170
171GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind, 171GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
172 bool is_big_pages) { 172 bool is_big_pages) {
173 std::unique_lock<std::mutex> lock(guard);
173 if (is_big_pages) [[likely]] { 174 if (is_big_pages) [[likely]] {
174 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind); 175 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
175 } 176 }
@@ -177,6 +178,7 @@ GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
177} 178}
178 179
179GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { 180GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
181 std::unique_lock<std::mutex> lock(guard);
180 if (is_big_pages) [[likely]] { 182 if (is_big_pages) [[likely]] {
181 return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID); 183 return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
182 } 184 }
@@ -187,6 +189,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
187 if (size == 0) { 189 if (size == 0) {
188 return; 190 return;
189 } 191 }
192 std::unique_lock<std::mutex> lock(guard);
190 GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); 193 GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
191 194
192 for (const auto& [map_addr, map_size] : page_stash) { 195 for (const auto& [map_addr, map_size] : page_stash) {
@@ -553,6 +556,7 @@ size_t MemoryManager::MaxContinuousRange(GPUVAddr gpu_addr, size_t size) const {
553} 556}
554 557
555size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { 558size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
559 std::unique_lock<std::mutex> lock(guard);
556 return kind_map.GetContinuousSizeFrom(gpu_addr); 560 return kind_map.GetContinuousSizeFrom(gpu_addr);
557} 561}
558 562
@@ -745,10 +749,10 @@ void MemoryManager::FlushCaching() {
745 return; 749 return;
746 } 750 }
747 accumulator->Callback([this](GPUVAddr addr, size_t size) { 751 accumulator->Callback([this](GPUVAddr addr, size_t size) {
748 GetSubmappedRangeImpl<false>(addr, size, page_stash); 752 GetSubmappedRangeImpl<false>(addr, size, page_stash2);
749 }); 753 });
750 rasterizer->InnerInvalidation(page_stash); 754 rasterizer->InnerInvalidation(page_stash2);
751 page_stash.clear(); 755 page_stash2.clear();
752 accumulator->Clear(); 756 accumulator->Clear();
753} 757}
754 758
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fbbe856c4..794535122 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -5,6 +5,7 @@
5 5
6#include <atomic> 6#include <atomic>
7#include <map> 7#include <map>
8#include <mutex>
8#include <optional> 9#include <optional>
9#include <vector> 10#include <vector>
10 11
@@ -215,6 +216,9 @@ private:
215 216
216 std::vector<u64> big_page_continuous; 217 std::vector<u64> big_page_continuous;
217 std::vector<std::pair<VAddr, std::size_t>> page_stash{}; 218 std::vector<std::pair<VAddr, std::size_t>> page_stash{};
219 std::vector<std::pair<VAddr, std::size_t>> page_stash2{};
220
221 mutable std::mutex guard;
218 222
219 static constexpr size_t continuous_bits = 64; 223 static constexpr size_t continuous_bits = 64;
220 224
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 8906ba6d8..941de95c1 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -6,6 +6,7 @@
6#include <algorithm> 6#include <algorithm>
7#include <array> 7#include <array>
8#include <cstring> 8#include <cstring>
9#include <functional>
9#include <iterator> 10#include <iterator>
10#include <list> 11#include <list>
11#include <memory> 12#include <memory>
@@ -17,13 +18,19 @@
17 18
18#include "common/assert.h" 19#include "common/assert.h"
19#include "common/settings.h" 20#include "common/settings.h"
21#include "core/memory.h"
20#include "video_core/control/channel_state_cache.h" 22#include "video_core/control/channel_state_cache.h"
21#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
22#include "video_core/memory_manager.h" 24#include "video_core/memory_manager.h"
23#include "video_core/rasterizer_interface.h" 25#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h"
24 27
25namespace VideoCommon { 28namespace VideoCommon {
26 29
30using AsyncJobId = SlotId;
31
32static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
33
27template <class QueryCache, class HostCounter> 34template <class QueryCache, class HostCounter>
28class CounterStreamBase { 35class CounterStreamBase {
29public: 36public:
@@ -93,9 +100,13 @@ private:
93template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> 100template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
94class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 101class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
95public: 102public:
96 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) 103 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
97 : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), 104 Core::Memory::Memory& cpu_memory_)
98 VideoCore::QueryType::SamplesPassed}}} {} 105 : rasterizer{rasterizer_},
106 cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
107 VideoCore::QueryType::SamplesPassed}}} {
108 (void)slot_async_jobs.insert(); // Null value
109 }
99 110
100 void InvalidateRegion(VAddr addr, std::size_t size) { 111 void InvalidateRegion(VAddr addr, std::size_t size) {
101 std::unique_lock lock{mutex}; 112 std::unique_lock lock{mutex};
@@ -126,10 +137,15 @@ public:
126 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 137 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
127 } 138 }
128 139
129 query->BindCounter(Stream(type).Current(), timestamp); 140 auto result = query->BindCounter(Stream(type).Current(), timestamp);
130 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 141 if (result) {
131 AsyncFlushQuery(*cpu_addr); 142 auto async_job_id = query->GetAsyncJob();
143 auto& async_job = slot_async_jobs[async_job_id];
144 async_job.collected = true;
145 async_job.value = *result;
146 query->SetAsyncJob(NULL_ASYNC_JOB_ID);
132 } 147 }
148 AsyncFlushQuery(query, timestamp, lock);
133 } 149 }
134 150
135 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 151 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -173,15 +189,18 @@ public:
173 } 189 }
174 190
175 void CommitAsyncFlushes() { 191 void CommitAsyncFlushes() {
192 std::unique_lock lock{mutex};
176 committed_flushes.push_back(uncommitted_flushes); 193 committed_flushes.push_back(uncommitted_flushes);
177 uncommitted_flushes.reset(); 194 uncommitted_flushes.reset();
178 } 195 }
179 196
180 bool HasUncommittedFlushes() const { 197 bool HasUncommittedFlushes() const {
198 std::unique_lock lock{mutex};
181 return uncommitted_flushes != nullptr; 199 return uncommitted_flushes != nullptr;
182 } 200 }
183 201
184 bool ShouldWaitAsyncFlushes() const { 202 bool ShouldWaitAsyncFlushes() const {
203 std::unique_lock lock{mutex};
185 if (committed_flushes.empty()) { 204 if (committed_flushes.empty()) {
186 return false; 205 return false;
187 } 206 }
@@ -189,6 +208,7 @@ public:
189 } 208 }
190 209
191 void PopAsyncFlushes() { 210 void PopAsyncFlushes() {
211 std::unique_lock lock{mutex};
192 if (committed_flushes.empty()) { 212 if (committed_flushes.empty()) {
193 return; 213 return;
194 } 214 }
@@ -197,15 +217,25 @@ public:
197 committed_flushes.pop_front(); 217 committed_flushes.pop_front();
198 return; 218 return;
199 } 219 }
200 for (VAddr query_address : *flush_list) { 220 for (AsyncJobId async_job_id : *flush_list) {
201 FlushAndRemoveRegion(query_address, 4); 221 AsyncJob& async_job = slot_async_jobs[async_job_id];
222 if (!async_job.collected) {
223 FlushAndRemoveRegion(async_job.query_location, 2, true);
224 }
202 } 225 }
203 committed_flushes.pop_front(); 226 committed_flushes.pop_front();
204 } 227 }
205 228
206private: 229private:
230 struct AsyncJob {
231 bool collected = false;
232 u64 value = 0;
233 VAddr query_location = 0;
234 std::optional<u64> timestamp{};
235 };
236
207 /// Flushes a memory range to guest memory and removes it from the cache. 237 /// Flushes a memory range to guest memory and removes it from the cache.
208 void FlushAndRemoveRegion(VAddr addr, std::size_t size) { 238 void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) {
209 const u64 addr_begin = addr; 239 const u64 addr_begin = addr;
210 const u64 addr_end = addr_begin + size; 240 const u64 addr_end = addr_begin + size;
211 const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { 241 const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
@@ -226,7 +256,16 @@ private:
226 continue; 256 continue;
227 } 257 }
228 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); 258 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
229 query.Flush(); 259 AsyncJobId async_job_id = query.GetAsyncJob();
260 auto flush_result = query.Flush(async);
261 if (async_job_id == NULL_ASYNC_JOB_ID) {
262 ASSERT_MSG(false, "This should not be reachable at all");
263 continue;
264 }
265 AsyncJob& async_job = slot_async_jobs[async_job_id];
266 async_job.collected = true;
267 async_job.value = flush_result;
268 query.SetAsyncJob(NULL_ASYNC_JOB_ID);
230 } 269 }
231 std::erase_if(contents, in_range); 270 std::erase_if(contents, in_range);
232 } 271 }
@@ -253,26 +292,60 @@ private:
253 return found != std::end(contents) ? &*found : nullptr; 292 return found != std::end(contents) ? &*found : nullptr;
254 } 293 }
255 294
256 void AsyncFlushQuery(VAddr addr) { 295 void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp,
257 if (!uncommitted_flushes) { 296 std::unique_lock<std::recursive_mutex>& lock) {
258 uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); 297 const AsyncJobId new_async_job_id = slot_async_jobs.insert();
298 {
299 AsyncJob& async_job = slot_async_jobs[new_async_job_id];
300 query->SetAsyncJob(new_async_job_id);
301 async_job.query_location = query->GetCpuAddr();
302 async_job.collected = false;
303
304 if (!uncommitted_flushes) {
305 uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>();
306 }
307 uncommitted_flushes->push_back(new_async_job_id);
259 } 308 }
260 uncommitted_flushes->push_back(addr); 309 lock.unlock();
310 std::function<void()> operation([this, new_async_job_id, timestamp] {
311 std::unique_lock local_lock{mutex};
312 AsyncJob& async_job = slot_async_jobs[new_async_job_id];
313 u64 value = async_job.value;
314 VAddr address = async_job.query_location;
315 slot_async_jobs.erase(new_async_job_id);
316 local_lock.unlock();
317 if (timestamp) {
318 u64 timestamp_value = *timestamp;
319 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64));
320 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
321 rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
322 VideoCommon::CacheType::NoQueryCache);
323 } else {
324 u32 small_value = static_cast<u32>(value);
325 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
326 rasterizer.InvalidateRegion(address, sizeof(u32),
327 VideoCommon::CacheType::NoQueryCache);
328 }
329 });
330 rasterizer.SyncOperation(std::move(operation));
261 } 331 }
262 332
263 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; 333 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
264 static constexpr unsigned YUZU_PAGEBITS = 12; 334 static constexpr unsigned YUZU_PAGEBITS = 12;
265 335
336 SlotVector<AsyncJob> slot_async_jobs;
337
266 VideoCore::RasterizerInterface& rasterizer; 338 VideoCore::RasterizerInterface& rasterizer;
339 Core::Memory::Memory& cpu_memory;
267 340
268 std::recursive_mutex mutex; 341 mutable std::recursive_mutex mutex;
269 342
270 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; 343 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
271 344
272 std::array<CounterStream, VideoCore::NumQueryTypes> streams; 345 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
273 346
274 std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; 347 std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
275 std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; 348 std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
276}; 349};
277 350
278template <class QueryCache, class HostCounter> 351template <class QueryCache, class HostCounter>
@@ -291,12 +364,12 @@ public:
291 virtual ~HostCounterBase() = default; 364 virtual ~HostCounterBase() = default;
292 365
293 /// Returns the current value of the query. 366 /// Returns the current value of the query.
294 u64 Query() { 367 u64 Query(bool async = false) {
295 if (result) { 368 if (result) {
296 return *result; 369 return *result;
297 } 370 }
298 371
299 u64 value = BlockingQuery() + base_result; 372 u64 value = BlockingQuery(async) + base_result;
300 if (dependency) { 373 if (dependency) {
301 value += dependency->Query(); 374 value += dependency->Query();
302 dependency = nullptr; 375 dependency = nullptr;
@@ -317,7 +390,7 @@ public:
317 390
318protected: 391protected:
319 /// Returns the value of query from the backend API blocking as needed. 392 /// Returns the value of query from the backend API blocking as needed.
320 virtual u64 BlockingQuery() const = 0; 393 virtual u64 BlockingQuery(bool async = false) const = 0;
321 394
322private: 395private:
323 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. 396 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
@@ -340,26 +413,33 @@ public:
340 CachedQueryBase& operator=(const CachedQueryBase&) = delete; 413 CachedQueryBase& operator=(const CachedQueryBase&) = delete;
341 414
342 /// Flushes the query to guest memory. 415 /// Flushes the query to guest memory.
343 virtual void Flush() { 416 virtual u64 Flush(bool async = false) {
344 // When counter is nullptr it means that it's just been reset. We are supposed to write a 417 // When counter is nullptr it means that it's just been reset. We are supposed to write a
345 // zero in these cases. 418 // zero in these cases.
346 const u64 value = counter ? counter->Query() : 0; 419 const u64 value = counter ? counter->Query(async) : 0;
420 if (async) {
421 return value;
422 }
347 std::memcpy(host_ptr, &value, sizeof(u64)); 423 std::memcpy(host_ptr, &value, sizeof(u64));
348 424
349 if (timestamp) { 425 if (timestamp) {
350 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); 426 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
351 } 427 }
428 return value;
352 } 429 }
353 430
354 /// Binds a counter to this query. 431 /// Binds a counter to this query.
355 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { 432 std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_,
433 std::optional<u64> timestamp_) {
434 std::optional<u64> result{};
356 if (counter) { 435 if (counter) {
357 // If there's an old counter set it means the query is being rewritten by the game. 436 // If there's an old counter set it means the query is being rewritten by the game.
358 // To avoid losing the data forever, flush here. 437 // To avoid losing the data forever, flush here.
359 Flush(); 438 result = std::make_optional(Flush());
360 } 439 }
361 counter = std::move(counter_); 440 counter = std::move(counter_);
362 timestamp = timestamp_; 441 timestamp = timestamp_;
442 return result;
363 } 443 }
364 444
365 VAddr GetCpuAddr() const noexcept { 445 VAddr GetCpuAddr() const noexcept {
@@ -374,6 +454,14 @@ public:
374 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; 454 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
375 } 455 }
376 456
457 void SetAsyncJob(AsyncJobId assigned_async_job_) {
458 assigned_async_job = assigned_async_job_;
459 }
460
461 AsyncJobId GetAsyncJob() const {
462 return assigned_async_job;
463 }
464
377protected: 465protected:
378 /// Returns true when querying the counter may potentially block. 466 /// Returns true when querying the counter may potentially block.
379 bool WaitPending() const noexcept { 467 bool WaitPending() const noexcept {
@@ -389,6 +477,7 @@ private:
389 u8* host_ptr; ///< Writable host pointer. 477 u8* host_ptr; ///< Writable host pointer.
390 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. 478 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
391 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. 479 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
480 AsyncJobId assigned_async_job;
392}; 481};
393 482
394} // namespace VideoCommon 483} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index f1446e732..e21b19dcc 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -30,7 +30,17 @@ private:
30}; 30};
31 31
32using Fence = std::shared_ptr<GLInnerFence>; 32using Fence = std::shared_ptr<GLInnerFence>;
33using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; 33
34struct FenceManagerParams {
35 using FenceType = Fence;
36 using BufferCacheType = BufferCache;
37 using TextureCacheType = TextureCache;
38 using QueryCacheType = QueryCache;
39
40 static constexpr bool HAS_ASYNC_CHECK = false;
41};
42
43using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
34 44
35class FenceManagerOpenGL final : public GenericFenceManager { 45class FenceManagerOpenGL final : public GenericFenceManager {
36public: 46public:
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 5070db441..99d7347f5 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,8 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
26 26
27} // Anonymous namespace 27} // Anonymous namespace
28 28
29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_) 29QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_)
30 : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {} 30 : QueryCacheBase(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {}
31 31
32QueryCache::~QueryCache() = default; 32QueryCache::~QueryCache() = default;
33 33
@@ -74,7 +74,7 @@ void HostCounter::EndQuery() {
74 glEndQuery(GetTarget(type)); 74 glEndQuery(GetTarget(type));
75} 75}
76 76
77u64 HostCounter::BlockingQuery() const { 77u64 HostCounter::BlockingQuery([[maybe_unused]] bool async) const {
78 GLint64 value; 78 GLint64 value;
79 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); 79 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
80 return static_cast<u64>(value); 80 return static_cast<u64>(value);
@@ -96,7 +96,7 @@ CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
96 return *this; 96 return *this;
97} 97}
98 98
99void CachedQuery::Flush() { 99u64 CachedQuery::Flush([[maybe_unused]] bool async) {
100 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. 100 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
101 // To avoid this disable and re-enable keeping the dependency stream. 101 // To avoid this disable and re-enable keeping the dependency stream.
102 // But we only have to do this if we have pending waits to be done. 102 // But we only have to do this if we have pending waits to be done.
@@ -106,11 +106,13 @@ void CachedQuery::Flush() {
106 stream.Update(false); 106 stream.Update(false);
107 } 107 }
108 108
109 VideoCommon::CachedQueryBase<HostCounter>::Flush(); 109 auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush();
110 110
111 if (slice_counter) { 111 if (slice_counter) {
112 stream.Update(true); 112 stream.Update(true);
113 } 113 }
114
115 return result;
114} 116}
115 117
116} // namespace OpenGL 118} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 14ce59990..872513f22 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,7 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
28class QueryCache final 28class QueryCache final
29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 29 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
30public: 30public:
31 explicit QueryCache(RasterizerOpenGL& rasterizer_); 31 explicit QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_);
32 ~QueryCache(); 32 ~QueryCache();
33 33
34 OGLQuery AllocateQuery(VideoCore::QueryType type); 34 OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -51,7 +51,7 @@ public:
51 void EndQuery(); 51 void EndQuery();
52 52
53private: 53private:
54 u64 BlockingQuery() const override; 54 u64 BlockingQuery(bool async = false) const override;
55 55
56 QueryCache& cache; 56 QueryCache& cache;
57 const VideoCore::QueryType type; 57 const VideoCore::QueryType type;
@@ -70,7 +70,7 @@ public:
70 CachedQuery(const CachedQuery&) = delete; 70 CachedQuery(const CachedQuery&) = delete;
71 CachedQuery& operator=(const CachedQuery&) = delete; 71 CachedQuery& operator=(const CachedQuery&) = delete;
72 72
73 void Flush() override; 73 u64 Flush(bool async = false) override;
74 74
75private: 75private:
76 QueryCache* cache; 76 QueryCache* cache;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4993d4709..0089b4b27 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -63,7 +63,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
63 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 63 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
64 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, 64 shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
65 state_tracker, gpu.ShaderNotify()), 65 state_tracker, gpu.ShaderNotify()),
66 query_cache(*this), accelerate_dma(buffer_cache, texture_cache), 66 query_cache(*this, cpu_memory_), accelerate_dma(buffer_cache, texture_cache),
67 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), 67 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
68 blit_image(program_manager_) {} 68 blit_image(program_manager_) {}
69 69
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index 0214b103a..fad9e3832 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -5,6 +5,7 @@
5 5
6#include "video_core/renderer_vulkan/vk_buffer_cache.h" 6#include "video_core/renderer_vulkan/vk_buffer_cache.h"
7#include "video_core/renderer_vulkan/vk_fence_manager.h" 7#include "video_core/renderer_vulkan/vk_fence_manager.h"
8#include "video_core/renderer_vulkan/vk_query_cache.h"
8#include "video_core/renderer_vulkan/vk_scheduler.h" 9#include "video_core/renderer_vulkan/vk_scheduler.h"
9#include "video_core/renderer_vulkan/vk_texture_cache.h" 10#include "video_core/renderer_vulkan/vk_texture_cache.h"
10#include "video_core/vulkan_common/vulkan_device.h" 11#include "video_core/vulkan_common/vulkan_device.h"
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 7fe2afcd9..145359d4e 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -40,7 +40,16 @@ private:
40}; 40};
41using Fence = std::shared_ptr<InnerFence>; 41using Fence = std::shared_ptr<InnerFence>;
42 42
43using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; 43struct FenceManagerParams {
44 using FenceType = Fence;
45 using BufferCacheType = BufferCache;
46 using TextureCacheType = TextureCache;
47 using QueryCacheType = QueryCache;
48
49 static constexpr bool HAS_ASYNC_CHECK = true;
50};
51
52using GenericFenceManager = VideoCommon::FenceManager<FenceManagerParams>;
44 53
45class FenceManager final : public GenericFenceManager { 54class FenceManager final : public GenericFenceManager {
46public: 55public:
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 929c8ece6..d67490449 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -66,9 +66,10 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
66 } 66 }
67} 67}
68 68
69QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, 69QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
70 Core::Memory::Memory& cpu_memory_, const Device& device_,
70 Scheduler& scheduler_) 71 Scheduler& scheduler_)
71 : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_}, 72 : QueryCacheBase{rasterizer_, cpu_memory_}, device{device_}, scheduler{scheduler_},
72 query_pools{ 73 query_pools{
73 QueryPool{device_, scheduler_, QueryType::SamplesPassed}, 74 QueryPool{device_, scheduler_, QueryType::SamplesPassed},
74 } {} 75 } {}
@@ -98,8 +99,10 @@ HostCounter::HostCounter(QueryCache& cache_, std::shared_ptr<HostCounter> depend
98 query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} { 99 query{cache_.AllocateQuery(type_)}, tick{cache_.GetScheduler().CurrentTick()} {
99 const vk::Device* logical = &cache.GetDevice().GetLogical(); 100 const vk::Device* logical = &cache.GetDevice().GetLogical();
100 cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { 101 cache.GetScheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
102 const bool use_precise = Settings::IsGPULevelHigh();
101 logical->ResetQueryPool(query.first, query.second, 1); 103 logical->ResetQueryPool(query.first, query.second, 1);
102 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); 104 cmdbuf.BeginQuery(query.first, query.second,
105 use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
103 }); 106 });
104} 107}
105 108
@@ -112,8 +115,10 @@ void HostCounter::EndQuery() {
112 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); 115 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
113} 116}
114 117
115u64 HostCounter::BlockingQuery() const { 118u64 HostCounter::BlockingQuery(bool async) const {
116 cache.GetScheduler().Wait(tick); 119 if (!async) {
120 cache.GetScheduler().Wait(tick);
121 }
117 u64 data; 122 u64 data;
118 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults( 123 const VkResult query_result = cache.GetDevice().GetLogical().GetQueryResults(
119 query.first, query.second, 1, sizeof(data), &data, sizeof(data), 124 query.first, query.second, 1, sizeof(data), &data, sizeof(data),
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 26762ee09..c1b9552eb 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,7 +52,8 @@ private:
52class QueryCache final 52class QueryCache final
53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 53 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
54public: 54public:
55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_, 55 explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, const Device& device_,
56 Scheduler& scheduler_); 57 Scheduler& scheduler_);
57 ~QueryCache(); 58 ~QueryCache();
58 59
@@ -83,7 +84,7 @@ public:
83 void EndQuery(); 84 void EndQuery();
84 85
85private: 86private:
86 u64 BlockingQuery() const override; 87 u64 BlockingQuery(bool async = false) const override;
87 88
88 QueryCache& cache; 89 QueryCache& cache;
89 const VideoCore::QueryType type; 90 const VideoCore::QueryType type;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2559a3aa7..d1489fc95 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -172,7 +172,8 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
172 buffer_cache(*this, cpu_memory_, buffer_cache_runtime), 172 buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
173 pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, 173 pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
174 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), 174 render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
175 query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), 175 query_cache{*this, cpu_memory_, device, scheduler},
176 accelerate_dma(buffer_cache, texture_cache, scheduler),
176 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), 177 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
177 wfi_event(device.GetLogical().CreateEvent()) { 178 wfi_event(device.GetLogical().CreateEvent()) {
178 scheduler.SetQueryCache(query_cache); 179 scheduler.SetQueryCache(query_cache);
@@ -675,7 +676,8 @@ bool RasterizerVulkan::AccelerateConditionalRendering() {
675 const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; 676 const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
676 Maxwell::ReportSemaphore::Compare cmp; 677 Maxwell::ReportSemaphore::Compare cmp;
677 if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), 678 if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
678 VideoCommon::CacheType::BufferCache)) { 679 VideoCommon::CacheType::BufferCache |
680 VideoCommon::CacheType::QueryCache)) {
679 return true; 681 return true;
680 } 682 }
681 return false; 683 return false;