summaryrefslogtreecommitdiff
path: root/src/video_core/query_cache.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/query_cache.h')
-rw-r--r--src/video_core/query_cache.h137
1 files changed, 113 insertions, 24 deletions
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 8906ba6d8..941de95c1 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -6,6 +6,7 @@
6#include <algorithm> 6#include <algorithm>
7#include <array> 7#include <array>
8#include <cstring> 8#include <cstring>
9#include <functional>
9#include <iterator> 10#include <iterator>
10#include <list> 11#include <list>
11#include <memory> 12#include <memory>
@@ -17,13 +18,19 @@
17 18
18#include "common/assert.h" 19#include "common/assert.h"
19#include "common/settings.h" 20#include "common/settings.h"
21#include "core/memory.h"
20#include "video_core/control/channel_state_cache.h" 22#include "video_core/control/channel_state_cache.h"
21#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
22#include "video_core/memory_manager.h" 24#include "video_core/memory_manager.h"
23#include "video_core/rasterizer_interface.h" 25#include "video_core/rasterizer_interface.h"
26#include "video_core/texture_cache/slot_vector.h"
24 27
25namespace VideoCommon { 28namespace VideoCommon {
26 29
30using AsyncJobId = SlotId;
31
32static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0};
33
27template <class QueryCache, class HostCounter> 34template <class QueryCache, class HostCounter>
28class CounterStreamBase { 35class CounterStreamBase {
29public: 36public:
@@ -93,9 +100,13 @@ private:
93template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> 100template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
94class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { 101class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
95public: 102public:
96 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) 103 explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
97 : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), 104 Core::Memory::Memory& cpu_memory_)
98 VideoCore::QueryType::SamplesPassed}}} {} 105 : rasterizer{rasterizer_},
106 cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
107 VideoCore::QueryType::SamplesPassed}}} {
108 (void)slot_async_jobs.insert(); // Null value
109 }
99 110
100 void InvalidateRegion(VAddr addr, std::size_t size) { 111 void InvalidateRegion(VAddr addr, std::size_t size) {
101 std::unique_lock lock{mutex}; 112 std::unique_lock lock{mutex};
@@ -126,10 +137,15 @@ public:
126 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 137 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
127 } 138 }
128 139
129 query->BindCounter(Stream(type).Current(), timestamp); 140 auto result = query->BindCounter(Stream(type).Current(), timestamp);
130 if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { 141 if (result) {
131 AsyncFlushQuery(*cpu_addr); 142 auto async_job_id = query->GetAsyncJob();
143 auto& async_job = slot_async_jobs[async_job_id];
144 async_job.collected = true;
145 async_job.value = *result;
146 query->SetAsyncJob(NULL_ASYNC_JOB_ID);
132 } 147 }
148 AsyncFlushQuery(query, timestamp, lock);
133 } 149 }
134 150
135 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 151 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -173,15 +189,18 @@ public:
173 } 189 }
174 190
175 void CommitAsyncFlushes() { 191 void CommitAsyncFlushes() {
192 std::unique_lock lock{mutex};
176 committed_flushes.push_back(uncommitted_flushes); 193 committed_flushes.push_back(uncommitted_flushes);
177 uncommitted_flushes.reset(); 194 uncommitted_flushes.reset();
178 } 195 }
179 196
180 bool HasUncommittedFlushes() const { 197 bool HasUncommittedFlushes() const {
198 std::unique_lock lock{mutex};
181 return uncommitted_flushes != nullptr; 199 return uncommitted_flushes != nullptr;
182 } 200 }
183 201
184 bool ShouldWaitAsyncFlushes() const { 202 bool ShouldWaitAsyncFlushes() const {
203 std::unique_lock lock{mutex};
185 if (committed_flushes.empty()) { 204 if (committed_flushes.empty()) {
186 return false; 205 return false;
187 } 206 }
@@ -189,6 +208,7 @@ public:
189 } 208 }
190 209
191 void PopAsyncFlushes() { 210 void PopAsyncFlushes() {
211 std::unique_lock lock{mutex};
192 if (committed_flushes.empty()) { 212 if (committed_flushes.empty()) {
193 return; 213 return;
194 } 214 }
@@ -197,15 +217,25 @@ public:
197 committed_flushes.pop_front(); 217 committed_flushes.pop_front();
198 return; 218 return;
199 } 219 }
200 for (VAddr query_address : *flush_list) { 220 for (AsyncJobId async_job_id : *flush_list) {
201 FlushAndRemoveRegion(query_address, 4); 221 AsyncJob& async_job = slot_async_jobs[async_job_id];
222 if (!async_job.collected) {
223 FlushAndRemoveRegion(async_job.query_location, 2, true);
224 }
202 } 225 }
203 committed_flushes.pop_front(); 226 committed_flushes.pop_front();
204 } 227 }
205 228
206private: 229private:
230 struct AsyncJob {
231 bool collected = false;
232 u64 value = 0;
233 VAddr query_location = 0;
234 std::optional<u64> timestamp{};
235 };
236
207 /// Flushes a memory range to guest memory and removes it from the cache. 237 /// Flushes a memory range to guest memory and removes it from the cache.
208 void FlushAndRemoveRegion(VAddr addr, std::size_t size) { 238 void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) {
209 const u64 addr_begin = addr; 239 const u64 addr_begin = addr;
210 const u64 addr_end = addr_begin + size; 240 const u64 addr_end = addr_begin + size;
211 const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { 241 const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
@@ -226,7 +256,16 @@ private:
226 continue; 256 continue;
227 } 257 }
228 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); 258 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
229 query.Flush(); 259 AsyncJobId async_job_id = query.GetAsyncJob();
260 auto flush_result = query.Flush(async);
261 if (async_job_id == NULL_ASYNC_JOB_ID) {
262 ASSERT_MSG(false, "This should not be reachable at all");
263 continue;
264 }
265 AsyncJob& async_job = slot_async_jobs[async_job_id];
266 async_job.collected = true;
267 async_job.value = flush_result;
268 query.SetAsyncJob(NULL_ASYNC_JOB_ID);
230 } 269 }
231 std::erase_if(contents, in_range); 270 std::erase_if(contents, in_range);
232 } 271 }
@@ -253,26 +292,60 @@ private:
253 return found != std::end(contents) ? &*found : nullptr; 292 return found != std::end(contents) ? &*found : nullptr;
254 } 293 }
255 294
256 void AsyncFlushQuery(VAddr addr) { 295 void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp,
257 if (!uncommitted_flushes) { 296 std::unique_lock<std::recursive_mutex>& lock) {
258 uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); 297 const AsyncJobId new_async_job_id = slot_async_jobs.insert();
298 {
299 AsyncJob& async_job = slot_async_jobs[new_async_job_id];
300 query->SetAsyncJob(new_async_job_id);
301 async_job.query_location = query->GetCpuAddr();
302 async_job.collected = false;
303
304 if (!uncommitted_flushes) {
305 uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>();
306 }
307 uncommitted_flushes->push_back(new_async_job_id);
259 } 308 }
260 uncommitted_flushes->push_back(addr); 309 lock.unlock();
310 std::function<void()> operation([this, new_async_job_id, timestamp] {
311 std::unique_lock local_lock{mutex};
312 AsyncJob& async_job = slot_async_jobs[new_async_job_id];
313 u64 value = async_job.value;
314 VAddr address = async_job.query_location;
315 slot_async_jobs.erase(new_async_job_id);
316 local_lock.unlock();
317 if (timestamp) {
318 u64 timestamp_value = *timestamp;
319 cpu_memory.WriteBlockUnsafe(address + sizeof(u64), &timestamp_value, sizeof(u64));
320 cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64));
321 rasterizer.InvalidateRegion(address, sizeof(u64) * 2,
322 VideoCommon::CacheType::NoQueryCache);
323 } else {
324 u32 small_value = static_cast<u32>(value);
325 cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32));
326 rasterizer.InvalidateRegion(address, sizeof(u32),
327 VideoCommon::CacheType::NoQueryCache);
328 }
329 });
330 rasterizer.SyncOperation(std::move(operation));
261 } 331 }
262 332
263 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; 333 static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
264 static constexpr unsigned YUZU_PAGEBITS = 12; 334 static constexpr unsigned YUZU_PAGEBITS = 12;
265 335
336 SlotVector<AsyncJob> slot_async_jobs;
337
266 VideoCore::RasterizerInterface& rasterizer; 338 VideoCore::RasterizerInterface& rasterizer;
339 Core::Memory::Memory& cpu_memory;
267 340
268 std::recursive_mutex mutex; 341 mutable std::recursive_mutex mutex;
269 342
270 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; 343 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
271 344
272 std::array<CounterStream, VideoCore::NumQueryTypes> streams; 345 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
273 346
274 std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; 347 std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{};
275 std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; 348 std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes;
276}; 349};
277 350
278template <class QueryCache, class HostCounter> 351template <class QueryCache, class HostCounter>
@@ -291,12 +364,12 @@ public:
291 virtual ~HostCounterBase() = default; 364 virtual ~HostCounterBase() = default;
292 365
293 /// Returns the current value of the query. 366 /// Returns the current value of the query.
294 u64 Query() { 367 u64 Query(bool async = false) {
295 if (result) { 368 if (result) {
296 return *result; 369 return *result;
297 } 370 }
298 371
299 u64 value = BlockingQuery() + base_result; 372 u64 value = BlockingQuery(async) + base_result;
300 if (dependency) { 373 if (dependency) {
301 value += dependency->Query(); 374 value += dependency->Query();
302 dependency = nullptr; 375 dependency = nullptr;
@@ -317,7 +390,7 @@ public:
317 390
318protected: 391protected:
319 /// Returns the value of query from the backend API blocking as needed. 392 /// Returns the value of query from the backend API blocking as needed.
320 virtual u64 BlockingQuery() const = 0; 393 virtual u64 BlockingQuery(bool async = false) const = 0;
321 394
322private: 395private:
323 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. 396 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
@@ -340,26 +413,33 @@ public:
340 CachedQueryBase& operator=(const CachedQueryBase&) = delete; 413 CachedQueryBase& operator=(const CachedQueryBase&) = delete;
341 414
342 /// Flushes the query to guest memory. 415 /// Flushes the query to guest memory.
343 virtual void Flush() { 416 virtual u64 Flush(bool async = false) {
344 // When counter is nullptr it means that it's just been reset. We are supposed to write a 417 // When counter is nullptr it means that it's just been reset. We are supposed to write a
345 // zero in these cases. 418 // zero in these cases.
346 const u64 value = counter ? counter->Query() : 0; 419 const u64 value = counter ? counter->Query(async) : 0;
420 if (async) {
421 return value;
422 }
347 std::memcpy(host_ptr, &value, sizeof(u64)); 423 std::memcpy(host_ptr, &value, sizeof(u64));
348 424
349 if (timestamp) { 425 if (timestamp) {
350 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); 426 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
351 } 427 }
428 return value;
352 } 429 }
353 430
354 /// Binds a counter to this query. 431 /// Binds a counter to this query.
355 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { 432 std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_,
433 std::optional<u64> timestamp_) {
434 std::optional<u64> result{};
356 if (counter) { 435 if (counter) {
357 // If there's an old counter set it means the query is being rewritten by the game. 436 // If there's an old counter set it means the query is being rewritten by the game.
358 // To avoid losing the data forever, flush here. 437 // To avoid losing the data forever, flush here.
359 Flush(); 438 result = std::make_optional(Flush());
360 } 439 }
361 counter = std::move(counter_); 440 counter = std::move(counter_);
362 timestamp = timestamp_; 441 timestamp = timestamp_;
442 return result;
363 } 443 }
364 444
365 VAddr GetCpuAddr() const noexcept { 445 VAddr GetCpuAddr() const noexcept {
@@ -374,6 +454,14 @@ public:
374 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; 454 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
375 } 455 }
376 456
457 void SetAsyncJob(AsyncJobId assigned_async_job_) {
458 assigned_async_job = assigned_async_job_;
459 }
460
461 AsyncJobId GetAsyncJob() const {
462 return assigned_async_job;
463 }
464
377protected: 465protected:
378 /// Returns true when querying the counter may potentially block. 466 /// Returns true when querying the counter may potentially block.
379 bool WaitPending() const noexcept { 467 bool WaitPending() const noexcept {
@@ -389,6 +477,7 @@ private:
389 u8* host_ptr; ///< Writable host pointer. 477 u8* host_ptr; ///< Writable host pointer.
390 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. 478 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
391 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. 479 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
480 AsyncJobId assigned_async_job;
392}; 481};
393 482
394} // namespace VideoCommon 483} // namespace VideoCommon