diff options
| author | 2023-08-19 21:49:38 +0200 | |
|---|---|---|
| committer | 2023-09-23 23:05:30 +0200 | |
| commit | 2fea1b8407b66dd0e9ed1776c34dad043e1becf4 (patch) | |
| tree | 4a5ad2bc67d2f07c1fafd7d3d1afb8d8b473fb9a /src | |
| parent | Query Cache: address issues (diff) | |
| download | yuzu-2fea1b8407b66dd0e9ed1776c34dad043e1becf4.tar.gz yuzu-2fea1b8407b66dd0e9ed1776c34dad043e1becf4.tar.xz yuzu-2fea1b8407b66dd0e9ed1776c34dad043e1becf4.zip | |
Query Cache: Fix guest side sample counting
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_base.h | 19 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache.h | 46 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_stream.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_query_cache.cpp | 62 |
5 files changed, 97 insertions, 46 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 46b9c548a..32d767d85 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 586 | } | 586 | } |
| 587 | 587 | ||
| 588 | void Maxwell3D::ProcessCounterReset() { | 588 | void Maxwell3D::ProcessCounterReset() { |
| 589 | #if ANDROID | ||
| 590 | if (!Settings::IsGPULevelHigh()) { | ||
| 591 | // This is problematic on Android, disable on GPU Normal. | ||
| 592 | return; | ||
| 593 | } | ||
| 594 | #endif | ||
| 595 | switch (regs.clear_report_value) { | 589 | switch (regs.clear_report_value) { |
| 596 | case Regs::ClearReport::ZPassPixelCount: | 590 | case Regs::ClearReport::ZPassPixelCount: |
| 597 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); | 591 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); |
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 993a13eac..1d786b3a7 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h | |||
| @@ -9,16 +9,15 @@ | |||
| 9 | namespace VideoCommon { | 9 | namespace VideoCommon { |
| 10 | 10 | ||
| 11 | enum class QueryFlagBits : u32 { | 11 | enum class QueryFlagBits : u32 { |
| 12 | HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. | 12 | HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. |
| 13 | IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host | 13 | IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host |
| 14 | IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host | 14 | IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host |
| 15 | IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. | 15 | IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. |
| 16 | IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query | 16 | IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query |
| 17 | IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query | 17 | IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query |
| 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. | 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. |
| 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. | 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. |
| 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. | 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. |
| 21 | IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment | ||
| 22 | }; | 21 | }; |
| 23 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | 22 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) |
| 24 | 23 | ||
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 042af053c..4b89b5bf6 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -256,30 +256,32 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 256 | u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); | 256 | u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); |
| 257 | u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); | 257 | u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); |
| 258 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | 258 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |
| 259 | std::function<void()> operation( | 259 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, |
| 260 | [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { | 260 | pointer, pointer_timestamp] { |
| 261 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | 261 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { |
| 262 | if (!is_synced) [[likely]] { | ||
| 263 | impl->pending_unregister.push_back(query_location); | ||
| 264 | } | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { | ||
| 268 | UNREACHABLE(); | ||
| 269 | return; | ||
| 270 | } | ||
| 271 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||
| 272 | u64 timestamp = impl->gpu.GetTicks(); | ||
| 273 | std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); | ||
| 274 | std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); | ||
| 275 | } else { | ||
| 276 | u32 value = static_cast<u32>(query_base->value); | ||
| 277 | std::memcpy(pointer, &value, sizeof(value)); | ||
| 278 | } | ||
| 279 | if (!is_synced) [[likely]] { | 262 | if (!is_synced) [[likely]] { |
| 280 | impl->pending_unregister.push_back(query_location); | 263 | impl->pending_unregister.push_back(query_location); |
| 281 | } | 264 | } |
| 282 | }); | 265 | return; |
| 266 | } | ||
| 267 | if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { | ||
| 268 | UNREACHABLE(); | ||
| 269 | return; | ||
| 270 | } | ||
| 271 | query_base->value += streamer->GetAmmendValue(); | ||
| 272 | streamer->SetAccumulationValue(query_base->value); | ||
| 273 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||
| 274 | u64 timestamp = impl->gpu.GetTicks(); | ||
| 275 | std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); | ||
| 276 | std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); | ||
| 277 | } else { | ||
| 278 | u32 value = static_cast<u32>(query_base->value); | ||
| 279 | std::memcpy(pointer, &value, sizeof(value)); | ||
| 280 | } | ||
| 281 | if (!is_synced) [[likely]] { | ||
| 282 | impl->pending_unregister.push_back(query_location); | ||
| 283 | } | ||
| 284 | }); | ||
| 283 | if (is_fence) { | 285 | if (is_fence) { |
| 284 | impl->rasterizer.SignalFence(std::move(operation)); | 286 | impl->rasterizer.SignalFence(std::move(operation)); |
| 285 | } else { | 287 | } else { |
| @@ -354,9 +356,9 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) { | |||
| 354 | if (resume) { | 356 | if (resume) { |
| 355 | impl->runtime.ResumeHostConditionalRendering(); | 357 | impl->runtime.ResumeHostConditionalRendering(); |
| 356 | } else { | 358 | } else { |
| 357 | impl->runtime.PauseHostConditionalRendering(); | ||
| 358 | CounterClose(VideoCommon::QueryType::ZPassPixelCount64); | 359 | CounterClose(VideoCommon::QueryType::ZPassPixelCount64); |
| 359 | CounterClose(VideoCommon::QueryType::StreamingByteCount); | 360 | CounterClose(VideoCommon::QueryType::StreamingByteCount); |
| 361 | impl->runtime.PauseHostConditionalRendering(); | ||
| 360 | } | 362 | } |
| 361 | } | 363 | } |
| 362 | 364 | ||
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index e7aac955b..39da6ac07 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h | |||
| @@ -78,6 +78,14 @@ public: | |||
| 78 | return dependence_mask; | 78 | return dependence_mask; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | u64 GetAmmendValue() const { | ||
| 82 | return ammend_value; | ||
| 83 | } | ||
| 84 | |||
| 85 | void SetAccumulationValue(u64 new_value) { | ||
| 86 | acumulation_value = new_value; | ||
| 87 | } | ||
| 88 | |||
| 81 | protected: | 89 | protected: |
| 82 | void MakeDependent(StreamerInterface* depend_on) { | 90 | void MakeDependent(StreamerInterface* depend_on) { |
| 83 | dependence_mask |= 1ULL << depend_on->id; | 91 | dependence_mask |= 1ULL << depend_on->id; |
| @@ -87,6 +95,8 @@ protected: | |||
| 87 | const size_t id; | 95 | const size_t id; |
| 88 | u64 dependence_mask; | 96 | u64 dependence_mask; |
| 89 | u64 dependent_mask; | 97 | u64 dependent_mask; |
| 98 | u64 ammend_value{}; | ||
| 99 | u64 acumulation_value{}; | ||
| 90 | }; | 100 | }; |
| 91 | 101 | ||
| 92 | template <typename QueryType> | 102 | template <typename QueryType> |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index add0c6fb3..2147776f8 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -110,13 +110,16 @@ struct HostSyncValues { | |||
| 110 | 110 | ||
| 111 | class SamplesStreamer : public BaseStreamer { | 111 | class SamplesStreamer : public BaseStreamer { |
| 112 | public: | 112 | public: |
| 113 | explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, | 113 | explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, |
| 114 | VideoCore::RasterizerInterface* rasterizer_, const Device& device_, | ||
| 114 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) | 115 | Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) |
| 115 | : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, | 116 | : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_}, |
| 116 | memory_allocator{memory_allocator_} { | 117 | scheduler{scheduler_}, memory_allocator{memory_allocator_} { |
| 117 | BuildResolveBuffer(); | 118 | BuildResolveBuffer(); |
| 118 | current_bank = nullptr; | 119 | current_bank = nullptr; |
| 119 | current_query = nullptr; | 120 | current_query = nullptr; |
| 121 | ammend_value = 0; | ||
| 122 | acumulation_value = 0; | ||
| 120 | } | 123 | } |
| 121 | 124 | ||
| 122 | ~SamplesStreamer() = default; | 125 | ~SamplesStreamer() = default; |
| @@ -151,6 +154,11 @@ public: | |||
| 151 | PauseCounter(); | 154 | PauseCounter(); |
| 152 | } | 155 | } |
| 153 | AbandonCurrentQuery(); | 156 | AbandonCurrentQuery(); |
| 157 | std::function<void()> func([this, counts = pending_flush_queries.size()] { | ||
| 158 | ammend_value = 0; | ||
| 159 | acumulation_value = 0; | ||
| 160 | }); | ||
| 161 | rasterizer->SyncOperation(std::move(func)); | ||
| 154 | } | 162 | } |
| 155 | 163 | ||
| 156 | void CloseCounter() override { | 164 | void CloseCounter() override { |
| @@ -244,7 +252,7 @@ public: | |||
| 244 | } | 252 | } |
| 245 | if (query->size_slots > 1) { | 253 | if (query->size_slots > 1) { |
| 246 | // This is problematic. | 254 | // This is problematic. |
| 247 | UNIMPLEMENTED(); | 255 | // UNIMPLEMENTED(); |
| 248 | } | 256 | } |
| 249 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; | 257 | query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; |
| 250 | auto loc_data = offsets[query->start_bank_id]; | 258 | auto loc_data = offsets[query->start_bank_id]; |
| @@ -255,16 +263,20 @@ public: | |||
| 255 | }); | 263 | }); |
| 256 | } | 264 | } |
| 257 | 265 | ||
| 266 | ReplicateCurrentQueryIfNeeded(); | ||
| 267 | std::function<void()> func([this] { ammend_value = acumulation_value; }); | ||
| 268 | rasterizer->SyncOperation(std::move(func)); | ||
| 258 | AbandonCurrentQuery(); | 269 | AbandonCurrentQuery(); |
| 259 | pending_sync.clear(); | 270 | pending_sync.clear(); |
| 260 | } | 271 | } |
| 261 | 272 | ||
| 262 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | 273 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, |
| 263 | [[maybe_unused]] std::optional<u32> subreport) override { | 274 | [[maybe_unused]] std::optional<u32> subreport) override { |
| 275 | PauseCounter(); | ||
| 264 | auto index = BuildQuery(); | 276 | auto index = BuildQuery(); |
| 265 | auto* new_query = GetQuery(index); | 277 | auto* new_query = GetQuery(index); |
| 266 | new_query->guest_address = address; | 278 | new_query->guest_address = address; |
| 267 | new_query->value = 100; | 279 | new_query->value = 0; |
| 268 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; | 280 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; |
| 269 | if (has_timestamp) { | 281 | if (has_timestamp) { |
| 270 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; | 282 | new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; |
| @@ -291,6 +303,7 @@ public: | |||
| 291 | 303 | ||
| 292 | void PushUnsyncedQueries() override { | 304 | void PushUnsyncedQueries() override { |
| 293 | PauseCounter(); | 305 | PauseCounter(); |
| 306 | current_bank->Close(); | ||
| 294 | { | 307 | { |
| 295 | std::scoped_lock lk(flush_guard); | 308 | std::scoped_lock lk(flush_guard); |
| 296 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); | 309 | pending_flush_sets.emplace_back(std::move(pending_flush_queries)); |
| @@ -429,6 +442,34 @@ private: | |||
| 429 | current_query_id = 0; | 442 | current_query_id = 0; |
| 430 | } | 443 | } |
| 431 | 444 | ||
| 445 | void ReplicateCurrentQueryIfNeeded() { | ||
| 446 | if (pending_sync.empty()) { | ||
| 447 | return; | ||
| 448 | } | ||
| 449 | if (!current_query) { | ||
| 450 | return; | ||
| 451 | } | ||
| 452 | auto index = BuildQuery(); | ||
| 453 | auto* new_query = GetQuery(index); | ||
| 454 | new_query->guest_address = 0; | ||
| 455 | new_query->value = 0; | ||
| 456 | new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; | ||
| 457 | new_query->start_bank_id = current_query->start_bank_id; | ||
| 458 | new_query->size_banks = current_query->size_banks; | ||
| 459 | new_query->start_slot = current_query->start_slot; | ||
| 460 | new_query->size_slots = current_query->size_slots; | ||
| 461 | ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) { | ||
| 462 | bank->AddReference(amount); | ||
| 463 | }); | ||
| 464 | pending_flush_queries.push_back(index); | ||
| 465 | std::function<void()> func([this, index] { | ||
| 466 | auto* query = GetQuery(index); | ||
| 467 | query->value += GetAmmendValue(); | ||
| 468 | SetAccumulationValue(query->value); | ||
| 469 | Free(index); | ||
| 470 | }); | ||
| 471 | } | ||
| 472 | |||
| 432 | void BuildResolveBuffer() { | 473 | void BuildResolveBuffer() { |
| 433 | const VkBufferCreateInfo buffer_ci = { | 474 | const VkBufferCreateInfo buffer_ci = { |
| 434 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 475 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| @@ -448,6 +489,7 @@ private: | |||
| 448 | static constexpr size_t resolve_slots = 8; | 489 | static constexpr size_t resolve_slots = 8; |
| 449 | 490 | ||
| 450 | QueryCacheRuntime& runtime; | 491 | QueryCacheRuntime& runtime; |
| 492 | VideoCore::RasterizerInterface* rasterizer; | ||
| 451 | const Device& device; | 493 | const Device& device; |
| 452 | Scheduler& scheduler; | 494 | Scheduler& scheduler; |
| 453 | const MemoryAllocator& memory_allocator; | 495 | const MemoryAllocator& memory_allocator; |
| @@ -470,6 +512,7 @@ private: | |||
| 470 | size_t current_query_id; | 512 | size_t current_query_id; |
| 471 | VideoCommon::HostQueryBase* current_query; | 513 | VideoCommon::HostQueryBase* current_query; |
| 472 | bool has_started{}; | 514 | bool has_started{}; |
| 515 | bool current_unset{}; | ||
| 473 | std::mutex flush_guard; | 516 | std::mutex flush_guard; |
| 474 | }; | 517 | }; |
| 475 | 518 | ||
| @@ -677,7 +720,6 @@ public: | |||
| 677 | size_t offset_base = staging_ref.offset; | 720 | size_t offset_base = staging_ref.offset; |
| 678 | for (auto q : pending_flush_queries) { | 721 | for (auto q : pending_flush_queries) { |
| 679 | auto* query = GetQuery(q); | 722 | auto* query = GetQuery(q); |
| 680 | query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush; | ||
| 681 | auto& bank = bank_pool.GetBank(query->start_bank_id); | 723 | auto& bank = bank_pool.GetBank(query->start_bank_id); |
| 682 | bank.Sync(staging_ref, offset_base, query->start_slot, 1); | 724 | bank.Sync(staging_ref, offset_base, query->start_slot, 1); |
| 683 | offset_base += TFBQueryBank::QUERY_SIZE; | 725 | offset_base += TFBQueryBank::QUERY_SIZE; |
| @@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl { | |||
| 1047 | buffer_cache{buffer_cache_}, device{device_}, | 1089 | buffer_cache{buffer_cache_}, device{device_}, |
| 1048 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, | 1090 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, |
| 1049 | guest_streamer(0, runtime), | 1091 | guest_streamer(0, runtime), |
| 1050 | sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device, | 1092 | sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer, |
| 1051 | scheduler, memory_allocator), | 1093 | device, scheduler, memory_allocator), |
| 1052 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, | 1094 | tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, |
| 1053 | scheduler, memory_allocator, staging_pool), | 1095 | scheduler, memory_allocator, staging_pool), |
| 1054 | primitives_succeeded_streamer( | 1096 | primitives_succeeded_streamer( |
| @@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku | |||
| 1277 | return true; | 1319 | return true; |
| 1278 | } | 1320 | } |
| 1279 | } | 1321 | } |
| 1322 | if (!is_in_bc[0] && !is_in_bc[1]) { | ||
| 1323 | // Both queries are in query cache, it's best to just flush. | ||
| 1324 | return false; | ||
| 1325 | } | ||
| 1280 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); | 1326 | HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); |
| 1281 | return true; | 1327 | return true; |
| 1282 | } | 1328 | } |