diff options
Diffstat (limited to 'src/video_core/query_cache.h')
| -rw-r--r-- | src/video_core/query_cache.h | 137 |
1 files changed, 113 insertions, 24 deletions
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 8906ba6d8..941de95c1 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <functional> | ||
| 9 | #include <iterator> | 10 | #include <iterator> |
| 10 | #include <list> | 11 | #include <list> |
| 11 | #include <memory> | 12 | #include <memory> |
| @@ -17,13 +18,19 @@ | |||
| 17 | 18 | ||
| 18 | #include "common/assert.h" | 19 | #include "common/assert.h" |
| 19 | #include "common/settings.h" | 20 | #include "common/settings.h" |
| 21 | #include "core/memory.h" | ||
| 20 | #include "video_core/control/channel_state_cache.h" | 22 | #include "video_core/control/channel_state_cache.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 23 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/memory_manager.h" | 24 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/rasterizer_interface.h" | 25 | #include "video_core/rasterizer_interface.h" |
| 26 | #include "video_core/texture_cache/slot_vector.h" | ||
| 24 | 27 | ||
| 25 | namespace VideoCommon { | 28 | namespace VideoCommon { |
| 26 | 29 | ||
| 30 | using AsyncJobId = SlotId; | ||
| 31 | |||
| 32 | static constexpr AsyncJobId NULL_ASYNC_JOB_ID{0}; | ||
| 33 | |||
| 27 | template <class QueryCache, class HostCounter> | 34 | template <class QueryCache, class HostCounter> |
| 28 | class CounterStreamBase { | 35 | class CounterStreamBase { |
| 29 | public: | 36 | public: |
| @@ -93,9 +100,13 @@ private: | |||
| 93 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | 100 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> |
| 94 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | 101 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { |
| 95 | public: | 102 | public: |
| 96 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_) | 103 | explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, |
| 97 | : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | 104 | Core::Memory::Memory& cpu_memory_) |
| 98 | VideoCore::QueryType::SamplesPassed}}} {} | 105 | : rasterizer{rasterizer_}, |
| 106 | cpu_memory{cpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this), | ||
| 107 | VideoCore::QueryType::SamplesPassed}}} { | ||
| 108 | (void)slot_async_jobs.insert(); // Null value | ||
| 109 | } | ||
| 99 | 110 | ||
| 100 | void InvalidateRegion(VAddr addr, std::size_t size) { | 111 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 101 | std::unique_lock lock{mutex}; | 112 | std::unique_lock lock{mutex}; |
| @@ -126,10 +137,15 @@ public: | |||
| 126 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 137 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); |
| 127 | } | 138 | } |
| 128 | 139 | ||
| 129 | query->BindCounter(Stream(type).Current(), timestamp); | 140 | auto result = query->BindCounter(Stream(type).Current(), timestamp); |
| 130 | if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 141 | if (result) { |
| 131 | AsyncFlushQuery(*cpu_addr); | 142 | auto async_job_id = query->GetAsyncJob(); |
| 143 | auto& async_job = slot_async_jobs[async_job_id]; | ||
| 144 | async_job.collected = true; | ||
| 145 | async_job.value = *result; | ||
| 146 | query->SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 132 | } | 147 | } |
| 148 | AsyncFlushQuery(query, timestamp, lock); | ||
| 133 | } | 149 | } |
| 134 | 150 | ||
| 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 151 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| @@ -173,15 +189,18 @@ public: | |||
| 173 | } | 189 | } |
| 174 | 190 | ||
| 175 | void CommitAsyncFlushes() { | 191 | void CommitAsyncFlushes() { |
| 192 | std::unique_lock lock{mutex}; | ||
| 176 | committed_flushes.push_back(uncommitted_flushes); | 193 | committed_flushes.push_back(uncommitted_flushes); |
| 177 | uncommitted_flushes.reset(); | 194 | uncommitted_flushes.reset(); |
| 178 | } | 195 | } |
| 179 | 196 | ||
| 180 | bool HasUncommittedFlushes() const { | 197 | bool HasUncommittedFlushes() const { |
| 198 | std::unique_lock lock{mutex}; | ||
| 181 | return uncommitted_flushes != nullptr; | 199 | return uncommitted_flushes != nullptr; |
| 182 | } | 200 | } |
| 183 | 201 | ||
| 184 | bool ShouldWaitAsyncFlushes() const { | 202 | bool ShouldWaitAsyncFlushes() const { |
| 203 | std::unique_lock lock{mutex}; | ||
| 185 | if (committed_flushes.empty()) { | 204 | if (committed_flushes.empty()) { |
| 186 | return false; | 205 | return false; |
| 187 | } | 206 | } |
| @@ -189,6 +208,7 @@ public: | |||
| 189 | } | 208 | } |
| 190 | 209 | ||
| 191 | void PopAsyncFlushes() { | 210 | void PopAsyncFlushes() { |
| 211 | std::unique_lock lock{mutex}; | ||
| 192 | if (committed_flushes.empty()) { | 212 | if (committed_flushes.empty()) { |
| 193 | return; | 213 | return; |
| 194 | } | 214 | } |
| @@ -197,15 +217,25 @@ public: | |||
| 197 | committed_flushes.pop_front(); | 217 | committed_flushes.pop_front(); |
| 198 | return; | 218 | return; |
| 199 | } | 219 | } |
| 200 | for (VAddr query_address : *flush_list) { | 220 | for (AsyncJobId async_job_id : *flush_list) { |
| 201 | FlushAndRemoveRegion(query_address, 4); | 221 | AsyncJob& async_job = slot_async_jobs[async_job_id]; |
| 222 | if (!async_job.collected) { | ||
| 223 | FlushAndRemoveRegion(async_job.query_location, 2, true); | ||
| 224 | } | ||
| 202 | } | 225 | } |
| 203 | committed_flushes.pop_front(); | 226 | committed_flushes.pop_front(); |
| 204 | } | 227 | } |
| 205 | 228 | ||
| 206 | private: | 229 | private: |
| 230 | struct AsyncJob { | ||
| 231 | bool collected = false; | ||
| 232 | u64 value = 0; | ||
| 233 | VAddr query_location = 0; | ||
| 234 | std::optional<u64> timestamp{}; | ||
| 235 | }; | ||
| 236 | |||
| 207 | /// Flushes a memory range to guest memory and removes it from the cache. | 237 | /// Flushes a memory range to guest memory and removes it from the cache. |
| 208 | void FlushAndRemoveRegion(VAddr addr, std::size_t size) { | 238 | void FlushAndRemoveRegion(VAddr addr, std::size_t size, bool async = false) { |
| 209 | const u64 addr_begin = addr; | 239 | const u64 addr_begin = addr; |
| 210 | const u64 addr_end = addr_begin + size; | 240 | const u64 addr_end = addr_begin + size; |
| 211 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { | 241 | const auto in_range = [addr_begin, addr_end](const CachedQuery& query) { |
| @@ -226,7 +256,16 @@ private: | |||
| 226 | continue; | 256 | continue; |
| 227 | } | 257 | } |
| 228 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); | 258 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); |
| 229 | query.Flush(); | 259 | AsyncJobId async_job_id = query.GetAsyncJob(); |
| 260 | auto flush_result = query.Flush(async); | ||
| 261 | if (async_job_id == NULL_ASYNC_JOB_ID) { | ||
| 262 | ASSERT_MSG(false, "This should not be reachable at all"); | ||
| 263 | continue; | ||
| 264 | } | ||
| 265 | AsyncJob& async_job = slot_async_jobs[async_job_id]; | ||
| 266 | async_job.collected = true; | ||
| 267 | async_job.value = flush_result; | ||
| 268 | query.SetAsyncJob(NULL_ASYNC_JOB_ID); | ||
| 230 | } | 269 | } |
| 231 | std::erase_if(contents, in_range); | 270 | std::erase_if(contents, in_range); |
| 232 | } | 271 | } |
| @@ -253,26 +292,60 @@ private: | |||
| 253 | return found != std::end(contents) ? &*found : nullptr; | 292 | return found != std::end(contents) ? &*found : nullptr; |
| 254 | } | 293 | } |
| 255 | 294 | ||
| 256 | void AsyncFlushQuery(VAddr addr) { | 295 | void AsyncFlushQuery(CachedQuery* query, std::optional<u64> timestamp, |
| 257 | if (!uncommitted_flushes) { | 296 | std::unique_lock<std::recursive_mutex>& lock) { |
| 258 | uncommitted_flushes = std::make_shared<std::vector<VAddr>>(); | 297 | const AsyncJobId new_async_job_id = slot_async_jobs.insert(); |
| 298 | { | ||
| 299 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 300 | query->SetAsyncJob(new_async_job_id); | ||
| 301 | async_job.query_location = query->GetCpuAddr(); | ||
| 302 | async_job.collected = false; | ||
| 303 | |||
| 304 | if (!uncommitted_flushes) { | ||
| 305 | uncommitted_flushes = std::make_shared<std::vector<AsyncJobId>>(); | ||
| 306 | } | ||
| 307 | uncommitted_flushes->push_back(new_async_job_id); | ||
| 259 | } | 308 | } |
| 260 | uncommitted_flushes->push_back(addr); | 309 | lock.unlock(); |
| 310 | std::function<void()> operation([this, new_async_job_id, timestamp] { | ||
| 311 | std::unique_lock local_lock{mutex}; | ||
| 312 | AsyncJob& async_job = slot_async_jobs[new_async_job_id]; | ||
| 313 | u64 value = async_job.value; | ||
| 314 | VAddr address = async_job.query_location; | ||
| 315 | slot_async_jobs.erase(new_async_job_id); | ||
| 316 | local_lock.unlock(); | ||
| 317 | if (timestamp) { | ||
| 318 | u64 timestamp_value = *timestamp; | ||
| 319 | cpu_memory.WriteBlockUnsafe(address + sizeof(u64), ×tamp_value, sizeof(u64)); | ||
| 320 | cpu_memory.WriteBlockUnsafe(address, &value, sizeof(u64)); | ||
| 321 | rasterizer.InvalidateRegion(address, sizeof(u64) * 2, | ||
| 322 | VideoCommon::CacheType::NoQueryCache); | ||
| 323 | } else { | ||
| 324 | u32 small_value = static_cast<u32>(value); | ||
| 325 | cpu_memory.WriteBlockUnsafe(address, &small_value, sizeof(u32)); | ||
| 326 | rasterizer.InvalidateRegion(address, sizeof(u32), | ||
| 327 | VideoCommon::CacheType::NoQueryCache); | ||
| 328 | } | ||
| 329 | }); | ||
| 330 | rasterizer.SyncOperation(std::move(operation)); | ||
| 261 | } | 331 | } |
| 262 | 332 | ||
| 263 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; | 333 | static constexpr std::uintptr_t YUZU_PAGESIZE = 4096; |
| 264 | static constexpr unsigned YUZU_PAGEBITS = 12; | 334 | static constexpr unsigned YUZU_PAGEBITS = 12; |
| 265 | 335 | ||
| 336 | SlotVector<AsyncJob> slot_async_jobs; | ||
| 337 | |||
| 266 | VideoCore::RasterizerInterface& rasterizer; | 338 | VideoCore::RasterizerInterface& rasterizer; |
| 339 | Core::Memory::Memory& cpu_memory; | ||
| 267 | 340 | ||
| 268 | std::recursive_mutex mutex; | 341 | mutable std::recursive_mutex mutex; |
| 269 | 342 | ||
| 270 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | 343 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; |
| 271 | 344 | ||
| 272 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 345 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 273 | 346 | ||
| 274 | std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{}; | 347 | std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; |
| 275 | std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes; | 348 | std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; |
| 276 | }; | 349 | }; |
| 277 | 350 | ||
| 278 | template <class QueryCache, class HostCounter> | 351 | template <class QueryCache, class HostCounter> |
| @@ -291,12 +364,12 @@ public: | |||
| 291 | virtual ~HostCounterBase() = default; | 364 | virtual ~HostCounterBase() = default; |
| 292 | 365 | ||
| 293 | /// Returns the current value of the query. | 366 | /// Returns the current value of the query. |
| 294 | u64 Query() { | 367 | u64 Query(bool async = false) { |
| 295 | if (result) { | 368 | if (result) { |
| 296 | return *result; | 369 | return *result; |
| 297 | } | 370 | } |
| 298 | 371 | ||
| 299 | u64 value = BlockingQuery() + base_result; | 372 | u64 value = BlockingQuery(async) + base_result; |
| 300 | if (dependency) { | 373 | if (dependency) { |
| 301 | value += dependency->Query(); | 374 | value += dependency->Query(); |
| 302 | dependency = nullptr; | 375 | dependency = nullptr; |
| @@ -317,7 +390,7 @@ public: | |||
| 317 | 390 | ||
| 318 | protected: | 391 | protected: |
| 319 | /// Returns the value of query from the backend API blocking as needed. | 392 | /// Returns the value of query from the backend API blocking as needed. |
| 320 | virtual u64 BlockingQuery() const = 0; | 393 | virtual u64 BlockingQuery(bool async = false) const = 0; |
| 321 | 394 | ||
| 322 | private: | 395 | private: |
| 323 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | 396 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. |
| @@ -340,26 +413,33 @@ public: | |||
| 340 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; | 413 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; |
| 341 | 414 | ||
| 342 | /// Flushes the query to guest memory. | 415 | /// Flushes the query to guest memory. |
| 343 | virtual void Flush() { | 416 | virtual u64 Flush(bool async = false) { |
| 344 | // When counter is nullptr it means that it's just been reset. We are supposed to write a | 417 | // When counter is nullptr it means that it's just been reset. We are supposed to write a |
| 345 | // zero in these cases. | 418 | // zero in these cases. |
| 346 | const u64 value = counter ? counter->Query() : 0; | 419 | const u64 value = counter ? counter->Query(async) : 0; |
| 420 | if (async) { | ||
| 421 | return value; | ||
| 422 | } | ||
| 347 | std::memcpy(host_ptr, &value, sizeof(u64)); | 423 | std::memcpy(host_ptr, &value, sizeof(u64)); |
| 348 | 424 | ||
| 349 | if (timestamp) { | 425 | if (timestamp) { |
| 350 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | 426 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); |
| 351 | } | 427 | } |
| 428 | return value; | ||
| 352 | } | 429 | } |
| 353 | 430 | ||
| 354 | /// Binds a counter to this query. | 431 | /// Binds a counter to this query. |
| 355 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | 432 | std::optional<u64> BindCounter(std::shared_ptr<HostCounter> counter_, |
| 433 | std::optional<u64> timestamp_) { | ||
| 434 | std::optional<u64> result{}; | ||
| 356 | if (counter) { | 435 | if (counter) { |
| 357 | // If there's an old counter set it means the query is being rewritten by the game. | 436 | // If there's an old counter set it means the query is being rewritten by the game. |
| 358 | // To avoid losing the data forever, flush here. | 437 | // To avoid losing the data forever, flush here. |
| 359 | Flush(); | 438 | result = std::make_optional(Flush()); |
| 360 | } | 439 | } |
| 361 | counter = std::move(counter_); | 440 | counter = std::move(counter_); |
| 362 | timestamp = timestamp_; | 441 | timestamp = timestamp_; |
| 442 | return result; | ||
| 363 | } | 443 | } |
| 364 | 444 | ||
| 365 | VAddr GetCpuAddr() const noexcept { | 445 | VAddr GetCpuAddr() const noexcept { |
| @@ -374,6 +454,14 @@ public: | |||
| 374 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | 454 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; |
| 375 | } | 455 | } |
| 376 | 456 | ||
| 457 | void SetAsyncJob(AsyncJobId assigned_async_job_) { | ||
| 458 | assigned_async_job = assigned_async_job_; | ||
| 459 | } | ||
| 460 | |||
| 461 | AsyncJobId GetAsyncJob() const { | ||
| 462 | return assigned_async_job; | ||
| 463 | } | ||
| 464 | |||
| 377 | protected: | 465 | protected: |
| 378 | /// Returns true when querying the counter may potentially block. | 466 | /// Returns true when querying the counter may potentially block. |
| 379 | bool WaitPending() const noexcept { | 467 | bool WaitPending() const noexcept { |
| @@ -389,6 +477,7 @@ private: | |||
| 389 | u8* host_ptr; ///< Writable host pointer. | 477 | u8* host_ptr; ///< Writable host pointer. |
| 390 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | 478 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. |
| 391 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | 479 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. |
| 480 | AsyncJobId assigned_async_job; | ||
| 392 | }; | 481 | }; |
| 393 | 482 | ||
| 394 | } // namespace VideoCommon | 483 | } // namespace VideoCommon |