summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-19 21:49:38 +0200
committerGravatar Fernando Sahmkow2023-09-23 23:05:30 +0200
commit2fea1b8407b66dd0e9ed1776c34dad043e1becf4 (patch)
tree4a5ad2bc67d2f07c1fafd7d3d1afb8d8b473fb9a /src
parentQuery Cache: address issues (diff)
downloadyuzu-2fea1b8407b66dd0e9ed1776c34dad043e1becf4.tar.gz
yuzu-2fea1b8407b66dd0e9ed1776c34dad043e1becf4.tar.xz
yuzu-2fea1b8407b66dd0e9ed1776c34dad043e1becf4.zip
Query Cache: Fix guest side sample counting
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp6
-rw-r--r--src/video_core/query_cache/query_base.h19
-rw-r--r--src/video_core/query_cache/query_cache.h46
-rw-r--r--src/video_core/query_cache/query_stream.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp62
5 files changed, 97 insertions, 46 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 46b9c548a..32d767d85 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() {
586} 586}
587 587
588void Maxwell3D::ProcessCounterReset() { 588void Maxwell3D::ProcessCounterReset() {
589#if ANDROID
590 if (!Settings::IsGPULevelHigh()) {
591 // This is problematic on Android, disable on GPU Normal.
592 return;
593 }
594#endif
595 switch (regs.clear_report_value) { 589 switch (regs.clear_report_value) {
596 case Regs::ClearReport::ZPassPixelCount: 590 case Regs::ClearReport::ZPassPixelCount:
597 rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); 591 rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 993a13eac..1d786b3a7 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -9,16 +9,15 @@
9namespace VideoCommon { 9namespace VideoCommon {
10 10
11enum class QueryFlagBits : u32 { 11enum class QueryFlagBits : u32 {
12 HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. 12 HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
13 IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host 13 IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
14 IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host 14 IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
15 IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. 15 IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
16 IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query 16 IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
17 IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query 17 IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. 18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. 19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
20 IsFence = 1 << 8, ///< Indicates the query is a fence. 20 IsFence = 1 << 8, ///< Indicates the query is a fence.
21 IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
22}; 21};
23DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) 22DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
24 23
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index 042af053c..4b89b5bf6 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -256,30 +256,32 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
256 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); 256 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
257 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); 257 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
258 bool is_synced = !Settings::IsGPULevelHigh() && is_fence; 258 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
259 std::function<void()> operation( 259 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
260 [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { 260 pointer, pointer_timestamp] {
261 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { 261 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
262 if (!is_synced) [[likely]] {
263 impl->pending_unregister.push_back(query_location);
264 }
265 return;
266 }
267 if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
268 UNREACHABLE();
269 return;
270 }
271 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
272 u64 timestamp = impl->gpu.GetTicks();
273 std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
274 std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
275 } else {
276 u32 value = static_cast<u32>(query_base->value);
277 std::memcpy(pointer, &value, sizeof(value));
278 }
279 if (!is_synced) [[likely]] { 262 if (!is_synced) [[likely]] {
280 impl->pending_unregister.push_back(query_location); 263 impl->pending_unregister.push_back(query_location);
281 } 264 }
282 }); 265 return;
266 }
267 if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
268 UNREACHABLE();
269 return;
270 }
271 query_base->value += streamer->GetAmmendValue();
272 streamer->SetAccumulationValue(query_base->value);
273 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
274 u64 timestamp = impl->gpu.GetTicks();
275 std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
276 std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
277 } else {
278 u32 value = static_cast<u32>(query_base->value);
279 std::memcpy(pointer, &value, sizeof(value));
280 }
281 if (!is_synced) [[likely]] {
282 impl->pending_unregister.push_back(query_location);
283 }
284 });
283 if (is_fence) { 285 if (is_fence) {
284 impl->rasterizer.SignalFence(std::move(operation)); 286 impl->rasterizer.SignalFence(std::move(operation));
285 } else { 287 } else {
@@ -354,9 +356,9 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
354 if (resume) { 356 if (resume) {
355 impl->runtime.ResumeHostConditionalRendering(); 357 impl->runtime.ResumeHostConditionalRendering();
356 } else { 358 } else {
357 impl->runtime.PauseHostConditionalRendering();
358 CounterClose(VideoCommon::QueryType::ZPassPixelCount64); 359 CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
359 CounterClose(VideoCommon::QueryType::StreamingByteCount); 360 CounterClose(VideoCommon::QueryType::StreamingByteCount);
361 impl->runtime.PauseHostConditionalRendering();
360 } 362 }
361} 363}
362 364
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
index e7aac955b..39da6ac07 100644
--- a/src/video_core/query_cache/query_stream.h
+++ b/src/video_core/query_cache/query_stream.h
@@ -78,6 +78,14 @@ public:
78 return dependence_mask; 78 return dependence_mask;
79 } 79 }
80 80
81 u64 GetAmmendValue() const {
82 return ammend_value;
83 }
84
85 void SetAccumulationValue(u64 new_value) {
86 acumulation_value = new_value;
87 }
88
81protected: 89protected:
82 void MakeDependent(StreamerInterface* depend_on) { 90 void MakeDependent(StreamerInterface* depend_on) {
83 dependence_mask |= 1ULL << depend_on->id; 91 dependence_mask |= 1ULL << depend_on->id;
@@ -87,6 +95,8 @@ protected:
87 const size_t id; 95 const size_t id;
88 u64 dependence_mask; 96 u64 dependence_mask;
89 u64 dependent_mask; 97 u64 dependent_mask;
98 u64 ammend_value{};
99 u64 acumulation_value{};
90}; 100};
91 101
92template <typename QueryType> 102template <typename QueryType>
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index add0c6fb3..2147776f8 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -110,13 +110,16 @@ struct HostSyncValues {
110 110
111class SamplesStreamer : public BaseStreamer { 111class SamplesStreamer : public BaseStreamer {
112public: 112public:
113 explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, 113 explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_,
114 VideoCore::RasterizerInterface* rasterizer_, const Device& device_,
114 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) 115 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
115 : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, 116 : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
116 memory_allocator{memory_allocator_} { 117 scheduler{scheduler_}, memory_allocator{memory_allocator_} {
117 BuildResolveBuffer(); 118 BuildResolveBuffer();
118 current_bank = nullptr; 119 current_bank = nullptr;
119 current_query = nullptr; 120 current_query = nullptr;
121 ammend_value = 0;
122 acumulation_value = 0;
120 } 123 }
121 124
122 ~SamplesStreamer() = default; 125 ~SamplesStreamer() = default;
@@ -151,6 +154,11 @@ public:
151 PauseCounter(); 154 PauseCounter();
152 } 155 }
153 AbandonCurrentQuery(); 156 AbandonCurrentQuery();
157 std::function<void()> func([this, counts = pending_flush_queries.size()] {
158 ammend_value = 0;
159 acumulation_value = 0;
160 });
161 rasterizer->SyncOperation(std::move(func));
154 } 162 }
155 163
156 void CloseCounter() override { 164 void CloseCounter() override {
@@ -244,7 +252,7 @@ public:
244 } 252 }
245 if (query->size_slots > 1) { 253 if (query->size_slots > 1) {
246 // This is problematic. 254 // This is problematic.
247 UNIMPLEMENTED(); 255 // UNIMPLEMENTED();
248 } 256 }
249 query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; 257 query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
250 auto loc_data = offsets[query->start_bank_id]; 258 auto loc_data = offsets[query->start_bank_id];
@@ -255,16 +263,20 @@ public:
255 }); 263 });
256 } 264 }
257 265
266 ReplicateCurrentQueryIfNeeded();
267 std::function<void()> func([this] { ammend_value = acumulation_value; });
268 rasterizer->SyncOperation(std::move(func));
258 AbandonCurrentQuery(); 269 AbandonCurrentQuery();
259 pending_sync.clear(); 270 pending_sync.clear();
260 } 271 }
261 272
262 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 273 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
263 [[maybe_unused]] std::optional<u32> subreport) override { 274 [[maybe_unused]] std::optional<u32> subreport) override {
275 PauseCounter();
264 auto index = BuildQuery(); 276 auto index = BuildQuery();
265 auto* new_query = GetQuery(index); 277 auto* new_query = GetQuery(index);
266 new_query->guest_address = address; 278 new_query->guest_address = address;
267 new_query->value = 100; 279 new_query->value = 0;
268 new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; 280 new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
269 if (has_timestamp) { 281 if (has_timestamp) {
270 new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; 282 new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
@@ -291,6 +303,7 @@ public:
291 303
292 void PushUnsyncedQueries() override { 304 void PushUnsyncedQueries() override {
293 PauseCounter(); 305 PauseCounter();
306 current_bank->Close();
294 { 307 {
295 std::scoped_lock lk(flush_guard); 308 std::scoped_lock lk(flush_guard);
296 pending_flush_sets.emplace_back(std::move(pending_flush_queries)); 309 pending_flush_sets.emplace_back(std::move(pending_flush_queries));
@@ -429,6 +442,34 @@ private:
429 current_query_id = 0; 442 current_query_id = 0;
430 } 443 }
431 444
445 void ReplicateCurrentQueryIfNeeded() {
446 if (pending_sync.empty()) {
447 return;
448 }
449 if (!current_query) {
450 return;
451 }
452 auto index = BuildQuery();
453 auto* new_query = GetQuery(index);
454 new_query->guest_address = 0;
455 new_query->value = 0;
456 new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
457 new_query->start_bank_id = current_query->start_bank_id;
458 new_query->size_banks = current_query->size_banks;
459 new_query->start_slot = current_query->start_slot;
460 new_query->size_slots = current_query->size_slots;
461 ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) {
462 bank->AddReference(amount);
463 });
464 pending_flush_queries.push_back(index);
465 std::function<void()> func([this, index] {
466 auto* query = GetQuery(index);
467 query->value += GetAmmendValue();
468 SetAccumulationValue(query->value);
469 Free(index);
470 });
471 }
472
432 void BuildResolveBuffer() { 473 void BuildResolveBuffer() {
433 const VkBufferCreateInfo buffer_ci = { 474 const VkBufferCreateInfo buffer_ci = {
434 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 475 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@@ -448,6 +489,7 @@ private:
448 static constexpr size_t resolve_slots = 8; 489 static constexpr size_t resolve_slots = 8;
449 490
450 QueryCacheRuntime& runtime; 491 QueryCacheRuntime& runtime;
492 VideoCore::RasterizerInterface* rasterizer;
451 const Device& device; 493 const Device& device;
452 Scheduler& scheduler; 494 Scheduler& scheduler;
453 const MemoryAllocator& memory_allocator; 495 const MemoryAllocator& memory_allocator;
@@ -470,6 +512,7 @@ private:
470 size_t current_query_id; 512 size_t current_query_id;
471 VideoCommon::HostQueryBase* current_query; 513 VideoCommon::HostQueryBase* current_query;
472 bool has_started{}; 514 bool has_started{};
515 bool current_unset{};
473 std::mutex flush_guard; 516 std::mutex flush_guard;
474}; 517};
475 518
@@ -677,7 +720,6 @@ public:
677 size_t offset_base = staging_ref.offset; 720 size_t offset_base = staging_ref.offset;
678 for (auto q : pending_flush_queries) { 721 for (auto q : pending_flush_queries) {
679 auto* query = GetQuery(q); 722 auto* query = GetQuery(q);
680 query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
681 auto& bank = bank_pool.GetBank(query->start_bank_id); 723 auto& bank = bank_pool.GetBank(query->start_bank_id);
682 bank.Sync(staging_ref, offset_base, query->start_slot, 1); 724 bank.Sync(staging_ref, offset_base, query->start_slot, 1);
683 offset_base += TFBQueryBank::QUERY_SIZE; 725 offset_base += TFBQueryBank::QUERY_SIZE;
@@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl {
1047 buffer_cache{buffer_cache_}, device{device_}, 1089 buffer_cache{buffer_cache_}, device{device_},
1048 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, 1090 memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
1049 guest_streamer(0, runtime), 1091 guest_streamer(0, runtime),
1050 sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device, 1092 sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
1051 scheduler, memory_allocator), 1093 device, scheduler, memory_allocator),
1052 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, 1094 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
1053 scheduler, memory_allocator, staging_pool), 1095 scheduler, memory_allocator, staging_pool),
1054 primitives_succeeded_streamer( 1096 primitives_succeeded_streamer(
@@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
1277 return true; 1319 return true;
1278 } 1320 }
1279 } 1321 }
1322 if (!is_in_bc[0] && !is_in_bc[1]) {
1323 // Both queries are in query cache, it's best to just flush.
1324 return false;
1325 }
1280 HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); 1326 HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
1281 return true; 1327 return true;
1282} 1328}