diff options
Diffstat (limited to 'src/video_core/query_cache.h')
| -rw-r--r-- | src/video_core/query_cache.h | 122 |
1 files changed, 94 insertions, 28 deletions
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index cd339b99d..2a14cc36a 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -17,13 +17,19 @@ | |||
| 17 | 17 | ||
| 18 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 19 | #include "common/settings.h" | 19 | #include "common/settings.h" |
| 20 | #include "core/memory.h" | ||
| 20 | #include "video_core/control/channel_state_cache.h" | 21 | #include "video_core/control/channel_state_cache.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/memory_manager.h" | 23 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/rasterizer_interface.h" | 24 | #include "video_core/rasterizer_interface.h" |
| 25 | #include "video_core/texture_cache/slot_vector.h" | ||
| 24 | 26 | ||
| 25 | namespace VideoCommon { | 27 | namespace VideoCommon { |
| 26 | 28 | ||
| 29 | using AsyncJobId = SlotId; | ||
| 30 | |||
| 31 | static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0}; | ||
| 32 | |||
| 27 | template <class QueryCache, class HostCounter> | 33 | template <class QueryCache, class HostCounter> |
| 28 | class CounterStreamBase { | 34 | class CounterStreamBase { |
| 29 | public: | 35 | public: |
| @@ -93,9 +99,13 @@ private: | |||
| 93 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | 99 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> |
| 94 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 100 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 95 | public: | 101 | public: |
| 96 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) | 102 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, |
| 97 | : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | 103 | Core::Memory::Memory& cpu_memory_) |
| 98 | VideoCore::QueryType::SamplesPassed}}} {} | 104 | : rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, streams{ |
| 105 | {CounterStream{static_cast<QueryCache&>(*this), | ||
| 106 | VideoCore::QueryType::SamplesPassed}}} { | ||
| 107 | (void) slot_async_jobs.insert(); // Null value | ||
| 108 | } | ||
| 99 | 109 | ||
| 100 | void InvalidateRegion(VAddr addr, std::size_t size) { | 110 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 101 | std::unique_lock lock{mutex}; | 111 | std::unique_lock lock{mutex}; |
| @@ -126,10 +136,15 @@ public: | |||
| 126 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 136 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); |
| 127 | } | 137 | } |
| 128 | 138 | ||
| 129 | query->BindCounter(Stream(type).Current(), timestamp); | 139 | auto result = query->BindCounter(Stream(type).Current()); |
| 130 | if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 140 | if (result) { |
| 131 | AsyncFlushQuery(*cpu_addr); | 141 | auto async_job_id = query->GetAsyncJob(); |
| 142 | auto& async_job = slot_async_jobs[async_job_id]; | ||
| 143 | async_job.collected = true; | ||
| 144 | async_job.value = *result; | ||
| 145 | query->SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 132 | } | 146 | } |
| 147 | AsyncFlushQuery(query, timestamp, lock); | ||
| 133 | } | 148 | } |
| 134 | 149 | ||
| 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 150 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| @@ -201,15 +216,25 @@ public: | |||
| 201 | committed_flushes.pop_front(); | 216 | committed_flushes.pop_front(); |
| 202 | return; | 217 | return; |
| 203 | } | 218 | } |
| 204 | for (VAddr query_address : *flush_list) { | 219 | for (AsyncJobId async_job_id : *flush_list) { |
| 205 | FlushAndRemoveRegion(query_address, 4); | 220 | AsyncJob& async_job = slot_async_jobs[async_job_id]; |
| 221 | if (!async_job.collected) { | ||
| 222 | FlushAndRemoveRegion(async_job.query_location, 2, true); | ||
| 223 | } | ||
| 206 | } | 224 | } |
| 207 | committed_flushes.pop_front(); | 225 | committed_flushes.pop_front(); |
| 208 | } | 226 | } |
| 209 | 227 | ||
| 210 | private: | 228 | private: |
| 229 | struct AsyncJob { | ||
| 230 | bool collected = false; | ||
| 231 | u64 value = 0; | ||
| 232 | VAddr query_location = 0; | ||
| 233 | std::optional<u64> timestamp{}; | ||
| 234 | }; | ||
| 235 | |||
| 211 | /// Flushes a memory range to guest memory and removes it from the cache. | 236 | /// Flushes a memory range to guest memory and removes it from the cache. |
| 212 | void FlushAndRemoveRegion(VAddr addr, std::size_t size) { | 237 | void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) { |
| 213 | const u64 addr_begin = addr; | 238 | const u64 addr_begin = addr; |
| 214 | const u64 addr_end = addr_begin + size; | 239 | const u64 addr_end = addr_begin + size; |
| 215 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { | 240 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { |
| @@ -230,7 +255,16 @@ private: | |||
| 230 | continue; | 255 | continue; |
| 231 | } | 256 | } |
| 232 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); | 257 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); |
| 233 | query.Flush(); | 258 | AsyncJobId async_job_id = query.GetAsyncJob(); |
| 259 | auto flush_result = query.Flush(async); | ||
| 260 | if (async_job_id == NULL_ASYNC_JOB_ID) { | ||
| 261 | ASSERT_MSG(false, "This should not be reachable at all"); | ||
| 262 | continue; | ||
| 263 | } | ||
| 264 | AsyncJob& async_job = slot_async_jobs[async_job_id]; | ||
| 265 | async_job.collected = true; | ||
| 266 | async_job.value = flush_result; | ||
| 267 | query.SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 234 | } | 268 | } |
| 235 | std::erase_if(contents, in_range); | 269 | std::erase_if(contents, in_range); |
| 236 | } | 270 | } |
| @@ -257,17 +291,43 @@ private: | |||
| 257 | return found != std::end(contents) ? &*found : nullptr; | 291 | return found != std::end(contents) ? &*found : nullptr; |
| 258 | } | 292 | } |
| 259 | 293 | ||
| 260 | void AsyncFlushQuery(VAddr addr) { | 294 | void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp, |
| 295 | std::unique_lock<std::recursive_mutex>& lock) { | ||
| 296 | const AsyncJobId new_async_job_id = slot_async_jobs.insert(); | ||
| 297 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 298 | query->SetAsyncJob(new_async_job_id); | ||
| 299 | async_job.query_location = query->GetCpuAddr(); | ||
| 300 | async_job.collected = false; | ||
| 301 | |||
| 261 | if (!uncommitted_flushes) { | 302 | if (!uncommitted_flushes) { |
| 262 | uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); | 303 | uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>(); |
| 263 | } | 304 | } |
| 264 | uncommitted_flushes->push_back(addr); | 305 | uncommitted_flushes->push_back(new_async_job_id); |
| 306 | lock.unlock(); | ||
| 307 | std::function<void()> operation([this, new_async_job_id, timestamp] { | ||
| 308 | std::unique_lock local_lock{mutex}; | ||
| 309 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 310 | if (timestamp) { | ||
| 311 | u64 timestamp_value = *timestamp; | ||
| 312 | cpu_memory.WriteBlockUnsafe(async_job.query_location + sizeof(u64), | ||
| 313 | ×tamp_value, sizeof(8)); | ||
| 314 | cpu_memory.WriteBlockUnsafe(async_job.query_location, &async_job.value, sizeof(8)); | ||
| 315 | } else { | ||
| 316 | u32 small_value = static_cast<u32>(async_job.value); | ||
| 317 | cpu_memory.WriteBlockUnsafe(async_job.query_location, &small_value, sizeof(u32)); | ||
| 318 | } | ||
| 319 | slot_async_jobs.erase(new_async_job_id); | ||
| 320 | }); | ||
| 321 | rasterizer.SyncOperation(std::move(operation)); | ||
| 265 | } | 322 | } |
| 266 | 323 | ||
| 267 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; | 324 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; |
| 268 | static constexpr unsigned YUZU_PAGEBITS = 12; | 325 | static constexpr unsigned YUZU_PAGEBITS = 12; |
| 269 | 326 | ||
| 327 | SlotVector<AsyncJob> slot_async_jobs; | ||
| 328 | |||
| 270 | VideoCore::RasterizerInterface& rasterizer; | 329 | VideoCore::RasterizerInterface& rasterizer; |
| 330 | Core::Memory::Memory& cpu_memory; | ||
| 271 | 331 | ||
| 272 | mutable std::recursive_mutex mutex; | 332 | mutable std::recursive_mutex mutex; |
| 273 | 333 | ||
| @@ -275,8 +335,8 @@ private: | |||
| 275 | 335 | ||
| 276 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 336 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 277 | 337 | ||
| 278 | std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; | 338 | std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; |
| 279 | std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; | 339 | std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; |
| 280 | }; | 340 | }; |
| 281 | 341 | ||
| 282 | template <class QueryCache, class HostCounter> | 342 | template <class QueryCache, class HostCounter> |
| @@ -295,12 +355,12 @@ public: | |||
| 295 | virtual ~HostCounterBase() = default; | 355 | virtual ~HostCounterBase() = default; |
| 296 | 356 | ||
| 297 | /// Returns the current value of the query. | 357 | /// Returns the current value of the query. |
| 298 | u64 Query() { | 358 | u64 Query(bool async = false) { |
| 299 | if (result) { | 359 | if (result) { |
| 300 | return *result; | 360 | return *result; |
| 301 | } | 361 | } |
| 302 | 362 | ||
| 303 | u64 value = BlockingQuery() + base_result; | 363 | u64 value = BlockingQuery(async) + base_result; |
| 304 | if (dependency) { | 364 | if (dependency) { |
| 305 | value += dependency->Query(); | 365 | value += dependency->Query(); |
| 306 | dependency = nullptr; | 366 | dependency = nullptr; |
| @@ -321,7 +381,7 @@ public: | |||
| 321 | 381 | ||
| 322 | protected: | 382 | protected: |
| 323 | /// Returns the value of query from the backend API blocking as needed. | 383 | /// Returns the value of query from the backend API blocking as needed. |
| 324 | virtual u64 BlockingQuery() const = 0; | 384 | virtual u64 BlockingQuery(bool async = false) const = 0; |
| 325 | 385 | ||
| 326 | private: | 386 | private: |
| 327 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | 387 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. |
| @@ -344,26 +404,23 @@ public: | |||
| 344 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; | 404 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; |
| 345 | 405 | ||
| 346 | /// Flushes the query to guest memory. | 406 | /// Flushes the query to guest memory. |
| 347 | virtual void Flush() { | 407 | virtual u64 Flush(bool async = false) { |
| 348 | // When counter is nullptr it means that it's just been reset. We are supposed to write a | 408 | // When counter is nullptr it means that it's just been reset. We are supposed to write a |
| 349 | // zero in these cases. | 409 | // zero in these cases. |
| 350 | const u64 value = counter ? counter->Query() : 0; | 410 | const u64 value = counter ? counter->Query(async) : 0; |
| 351 | std::memcpy(host_ptr, &value, sizeof(u64)); | 411 | return value; |
| 352 | |||
| 353 | if (timestamp) { | ||
| 354 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | ||
| 355 | } | ||
| 356 | } | 412 | } |
| 357 | 413 | ||
| 358 | /// Binds a counter to this query. | 414 | /// Binds a counter to this query. |
| 359 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | 415 | std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_) { |
| 416 | std::optional<u64> result{}; | ||
| 360 | if (counter) { | 417 | if (counter) { |
| 361 | // If there's an old counter set it means the query is being rewritten by the game. | 418 | // If there's an old counter set it means the query is being rewritten by the game. |
| 362 | // To avoid losing the data forever, flush here. | 419 | // To avoid losing the data forever, flush here. |
| 363 | Flush(); | 420 | result = std::make_optional(Flush()); |
| 364 | } | 421 | } |
| 365 | counter = std::move(counter_); | 422 | counter = std::move(counter_); |
| 366 | timestamp = timestamp_; | 423 | return result; |
| 367 | } | 424 | } |
| 368 | 425 | ||
| 369 | VAddr GetCpuAddr() const noexcept { | 426 | VAddr GetCpuAddr() const noexcept { |
| @@ -378,6 +435,14 @@ public: | |||
| 378 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | 435 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; |
| 379 | } | 436 | } |
| 380 | 437 | ||
| 438 | void SetAsyncJob(AsyncJobId assigned_async_job_) { | ||
| 439 | assigned_async_job = assigned_async_job_; | ||
| 440 | } | ||
| 441 | |||
| 442 | AsyncJobId GetAsyncJob() const { | ||
| 443 | return assigned_async_job; | ||
| 444 | } | ||
| 445 | |||
| 381 | protected: | 446 | protected: |
| 382 | /// Returns true when querying the counter may potentially block. | 447 | /// Returns true when querying the counter may potentially block. |
| 383 | bool WaitPending() const noexcept { | 448 | bool WaitPending() const noexcept { |
| @@ -393,6 +458,7 @@ private: | |||
| 393 | u8* host_ptr; ///< Writable host pointer. | 458 | u8* host_ptr; ///< Writable host pointer. |
| 394 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | 459 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. |
| 395 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | 460 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. |
| 461 | AsyncJobId assigned_async_job; | ||
| 396 | }; | 462 | }; |
| 397 | 463 | ||
| 398 | } // namespace VideoCommon | 464 | } // namespace VideoCommon |