summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-06 09:38:16 +0200
committerGravatar Fernando Sahmkow2023-09-23 23:05:30 +0200
commit282ae8fa51e060e6d4ef026b734aa871b1b9331e (patch)
tree3bc4603b6add0582315dc65544f1986427e4182d
parentQueryCache: Implement dependant queries. (diff)
downloadyuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.gz
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.xz
yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.zip
Query Cache: address issues
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h5
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp6
-rw-r--r--src/video_core/engines/puller.cpp6
-rw-r--r--src/video_core/fence_manager.h14
-rw-r--r--src/video_core/query_cache/bank_base.h16
-rw-r--r--src/video_core/query_cache/query_base.h44
-rw-r--r--src/video_core/query_cache/query_cache.h66
-rw-r--r--src/video_core/query_cache/query_cache_base.h8
-rw-r--r--src/video_core/query_cache/query_stream.h22
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp203
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h12
21 files changed, 270 insertions, 214 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f91b7d1e4..9e90c587c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -276,9 +276,8 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
276} 276}
277 277
278template <class P> 278template <class P>
279std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size, 279std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(
280 ObtainBufferSynchronize sync_info, 280 VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) {
281 ObtainBufferOperation post_op) {
282 const BufferId buffer_id = FindBuffer(cpu_addr, size); 281 const BufferId buffer_id = FindBuffer(cpu_addr, size);
283 Buffer& buffer = slot_buffers[buffer_id]; 282 Buffer& buffer = slot_buffers[buffer_id];
284 283
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 9507071e5..c4f6e8d12 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -297,8 +297,8 @@ public:
297 ObtainBufferOperation post_op); 297 ObtainBufferOperation post_op);
298 298
299 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, 299 [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size,
300 ObtainBufferSynchronize sync_info, 300 ObtainBufferSynchronize sync_info,
301 ObtainBufferOperation post_op); 301 ObtainBufferOperation post_op);
302 void FlushCachedWrites(); 302 void FlushCachedWrites();
303 303
304 /// Return true when there are uncommitted buffers to be downloaded 304 /// Return true when there are uncommitted buffers to be downloaded
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 922c399e6..46b9c548a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() {
596 case Regs::ClearReport::ZPassPixelCount: 596 case Regs::ClearReport::ZPassPixelCount:
597 rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); 597 rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
598 break; 598 break;
599 case Regs::ClearReport::PrimitivesGenerated:
600 rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
601 break;
602 case Regs::ClearReport::VtgPrimitivesOut:
603 rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount);
604 break;
605 default: 599 default:
606 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); 600 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
607 break; 601 break;
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 582738234..8dd34c04a 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
82 if (op == GpuSemaphoreOperation::WriteLong) { 82 if (op == GpuSemaphoreOperation::WriteLong) {
83 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; 83 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
84 const u32 payload = regs.semaphore_sequence; 84 const u32 payload = regs.semaphore_sequence;
85 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); 85 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
86 VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
86 } else { 87 } else {
87 do { 88 do {
88 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; 89 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
117void Puller::ProcessSemaphoreRelease() { 118void Puller::ProcessSemaphoreRelease() {
118 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; 119 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
119 const u32 payload = regs.semaphore_release; 120 const u32 payload = regs.semaphore_release;
120 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); 121 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
122 VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
121} 123}
122 124
123void Puller::ProcessSemaphoreAcquire() { 125void Puller::ProcessSemaphoreAcquire() {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8459a3092..805a89900 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -55,6 +55,9 @@ public:
55 55
56 // Unlike other fences, this one doesn't 56 // Unlike other fences, this one doesn't
57 void SignalOrdering() { 57 void SignalOrdering() {
58 if constexpr (!can_async_check) {
59 TryReleasePendingFences<false>();
60 }
58 std::scoped_lock lock{buffer_cache.mutex}; 61 std::scoped_lock lock{buffer_cache.mutex};
59 buffer_cache.AccumulateFlushes(); 62 buffer_cache.AccumulateFlushes();
60 } 63 }
@@ -104,13 +107,9 @@ public:
104 SignalFence(std::move(func)); 107 SignalFence(std::move(func));
105 } 108 }
106 109
107 void WaitPendingFences(bool force) { 110 void WaitPendingFences([[maybe_unused]] bool force) {
108 if constexpr (!can_async_check) { 111 if constexpr (!can_async_check) {
109 if (force) { 112 TryReleasePendingFences<true>();
110 TryReleasePendingFences<true>();
111 } else {
112 TryReleasePendingFences<false>();
113 }
114 } else { 113 } else {
115 if (!force) { 114 if (!force) {
116 return; 115 return;
@@ -125,7 +124,8 @@ public:
125 }); 124 });
126 SignalFence(std::move(func)); 125 SignalFence(std::move(func));
127 std::unique_lock lk(wait_mutex); 126 std::unique_lock lk(wait_mutex);
128 wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); 127 wait_cv.wait(
128 lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); });
129 } 129 }
130 } 130 }
131 131
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h
index 4246a609d..420927091 100644
--- a/src/video_core/query_cache/bank_base.h
+++ b/src/video_core/query_cache/bank_base.h
@@ -7,21 +7,19 @@
7#include <deque> 7#include <deque>
8#include <utility> 8#include <utility>
9 9
10
11#include "common/common_types.h" 10#include "common/common_types.h"
12 11
13namespace VideoCommon { 12namespace VideoCommon {
14 13
15class BankBase { 14class BankBase {
16protected: 15protected:
17 const size_t base_bank_size; 16 const size_t base_bank_size{};
18 size_t bank_size; 17 size_t bank_size{};
19 std::atomic<size_t> references; 18 std::atomic<size_t> references{};
20 size_t current_slot; 19 size_t current_slot{};
21 20
22public: 21public:
23 BankBase(size_t bank_size_) 22 explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {}
24 : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
25 23
26 virtual ~BankBase() = default; 24 virtual ~BankBase() = default;
27 25
@@ -58,11 +56,11 @@ public:
58 bank_size = current_slot; 56 bank_size = current_slot;
59 } 57 }
60 58
61 constexpr bool IsClosed() { 59 bool IsClosed() const {
62 return current_slot >= bank_size; 60 return current_slot >= bank_size;
63 } 61 }
64 62
65 bool IsDead() { 63 bool IsDead() const {
66 return IsClosed() && references == 0; 64 return IsClosed() && references == 0;
67 } 65 }
68}; 66};
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 0ae23af9f..993a13eac 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -9,28 +9,28 @@
9namespace VideoCommon { 9namespace VideoCommon {
10 10
11enum class QueryFlagBits : u32 { 11enum class QueryFlagBits : u32 {
12 HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. 12 HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
13 IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host 13 IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
14 IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host 14 IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
15 IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. 15 IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
16 IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query 16 IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
17 IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query 17 IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. 18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. 19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
20 IsFence = 1 << 8, ///< Indicates the query is a fence. 20 IsFence = 1 << 8, ///< Indicates the query is a fence.
21 IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment 21 IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
22}; 22};
23DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) 23DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
24 24
25class QueryBase { 25class QueryBase {
26public: 26public:
27 VAddr guest_address; 27 VAddr guest_address{};
28 QueryFlagBits flags; 28 QueryFlagBits flags{};
29 u64 value; 29 u64 value{};
30 30
31protected: 31protected:
32 // Default constructor 32 // Default constructor
33 QueryBase() : guest_address(0), flags{}, value{} {} 33 QueryBase() = default;
34 34
35 // Parameterized constructor 35 // Parameterized constructor
36 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) 36 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
@@ -51,23 +51,21 @@ public:
51class HostQueryBase : public QueryBase { 51class HostQueryBase : public QueryBase {
52public: 52public:
53 // Default constructor 53 // Default constructor
54 HostQueryBase() 54 HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {}
55 : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
56 size_banks{}, start_slot{}, size_slots{} {}
57 55
58 // Parameterized constructor 56 // Parameterized constructor
59 HostQueryBase(bool isLong, VAddr address) 57 HostQueryBase(bool has_timestamp, VAddr address)
60 : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, 58 : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
61 start_slot{}, size_slots{} { 59 start_slot{}, size_slots{} {
62 if (isLong) { 60 if (has_timestamp) {
63 flags |= QueryFlagBits::HasTimestamp; 61 flags |= QueryFlagBits::HasTimestamp;
64 } 62 }
65 } 63 }
66 64
67 u32 start_bank_id; 65 u32 start_bank_id{};
68 u32 size_banks; 66 u32 size_banks{};
69 size_t start_slot; 67 size_t start_slot{};
70 size_t size_slots; 68 size_t size_slots{};
71}; 69};
72 70
73} // namespace VideoCommon \ No newline at end of file 71} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index f1393d5c7..042af053c 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -54,7 +54,7 @@ public:
54 return new_id; 54 return new_id;
55 } 55 }
56 56
57 bool HasPendingSync() override { 57 bool HasPendingSync() const override {
58 return !pending_sync.empty(); 58 return !pending_sync.empty();
59 } 59 }
60 60
@@ -71,8 +71,10 @@ public:
71 continue; 71 continue;
72 } 72 }
73 query.flags |= QueryFlagBits::IsHostSynced; 73 query.flags |= QueryFlagBits::IsHostSynced;
74 sync_values.emplace_back(query.guest_address, query.value, 74 sync_values.emplace_back(SyncValuesStruct{
75 True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); 75 .address = query.guest_address,
76 .value = query.value,
77 .size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)});
76 } 78 }
77 pending_sync.clear(); 79 pending_sync.clear();
78 if (sync_values.size() > 0) { 80 if (sync_values.size() > 0) {
@@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer<Traits> {
90public: 92public:
91 using RuntimeType = typename Traits::RuntimeType; 93 using RuntimeType = typename Traits::RuntimeType;
92 94
93 StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} 95 StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_)
96 : GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {}
94 97
95 ~StubStreamer() override = default; 98 ~StubStreamer() override = default;
96 99
97 size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, 100 size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
98 std::optional<u32> subreport = std::nullopt) override { 101 std::optional<u32> subreport = std::nullopt) override {
99 size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); 102 size_t new_id =
103 GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport);
100 return new_id; 104 return new_id;
101 } 105 }
106
107private:
108 u32 stub_value;
102}; 109};
103 110
104template <typename Traits> 111template <typename Traits>
@@ -113,7 +120,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
113 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { 120 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
114 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); 121 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
115 if (streamers[i]) { 122 if (streamers[i]) {
116 streamer_mask |= 1ULL << i; 123 streamer_mask |= 1ULL << streamers[i]->GetId();
117 } 124 }
118 } 125 }
119 } 126 }
@@ -152,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
152 QueryCacheBase<Traits>* owner; 159 QueryCacheBase<Traits>* owner;
153 VideoCore::RasterizerInterface& rasterizer; 160 VideoCore::RasterizerInterface& rasterizer;
154 Core::Memory::Memory& cpu_memory; 161 Core::Memory::Memory& cpu_memory;
155 Traits::RuntimeType& runtime; 162 RuntimeType& runtime;
156 Tegra::GPU& gpu; 163 Tegra::GPU& gpu;
157 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; 164 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
158 u64 streamer_mask; 165 u64 streamer_mask;
@@ -223,15 +230,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
223 const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); 230 const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
224 size_t streamer_id = static_cast<size_t>(counter_type); 231 size_t streamer_id = static_cast<size_t>(counter_type);
225 auto* streamer = impl->streamers[streamer_id]; 232 auto* streamer = impl->streamers[streamer_id];
226 if (!streamer) [[unlikely]] { 233 if (streamer == nullptr) [[unlikely]] {
227 if (has_timestamp) { 234 counter_type = QueryType::Payload;
228 u64 timestamp = impl->gpu.GetTicks(); 235 payload = 1U;
229 gpu_memory->Write<u64>(addr + 8, timestamp); 236 streamer_id = static_cast<size_t>(counter_type);
230 gpu_memory->Write<u64>(addr, 1ULL); 237 streamer = impl->streamers[streamer_id];
231 } else {
232 gpu_memory->Write<u32>(addr, 1U);
233 }
234 return;
235 } 238 }
236 auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); 239 auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
237 if (!cpu_addr_opt) [[unlikely]] { 240 if (!cpu_addr_opt) [[unlikely]] {
@@ -403,12 +406,6 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
403 impl->runtime.EndHostConditionalRendering(); 406 impl->runtime.EndHostConditionalRendering();
404 return false; 407 return false;
405 } 408 }
406 /*if (!Settings::IsGPULevelHigh()) {
407 impl->runtime.EndHostConditionalRendering();
408 return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
409 VideoCommon::CacheType::BufferCache |
410 VideoCommon::CacheType::QueryCache);
411 }*/
412 const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); 409 const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
413 const GPUVAddr address = regs.render_enable.Address(); 410 const GPUVAddr address = regs.render_enable.Address();
414 switch (mode) { 411 switch (mode) {
@@ -442,6 +439,9 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
442// Async downloads 439// Async downloads
443template <typename Traits> 440template <typename Traits>
444void QueryCacheBase<Traits>::CommitAsyncFlushes() { 441void QueryCacheBase<Traits>::CommitAsyncFlushes() {
442 // Make sure to have the results synced in Host.
443 NotifyWFI();
444
445 u64 mask{}; 445 u64 mask{};
446 { 446 {
447 std::scoped_lock lk(impl->flush_guard); 447 std::scoped_lock lk(impl->flush_guard);
@@ -458,8 +458,19 @@ void QueryCacheBase<Traits>::CommitAsyncFlushes() {
458 if (mask == 0) { 458 if (mask == 0) {
459 return; 459 return;
460 } 460 }
461 impl->ForEachStreamerIn(mask, 461 u64 ran_mask = ~mask;
462 [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); 462 while (mask) {
463 impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
464 u64 dep_mask = streamer->GetDependentMask();
465 if ((dep_mask & ~ran_mask) != 0) {
466 return;
467 }
468 u64 index = streamer->GetId();
469 ran_mask |= (1ULL << index);
470 mask &= ~(1ULL << index);
471 streamer->PushUnsyncedQueries();
472 });
473 }
463} 474}
464 475
465template <typename Traits> 476template <typename Traits>
@@ -489,13 +500,11 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
489 if (mask == 0) { 500 if (mask == 0) {
490 return; 501 return;
491 } 502 }
492 u64 ran_mask = 0; 503 u64 ran_mask = ~mask;
493 u64 next_phase = 0;
494 while (mask) { 504 while (mask) {
495 impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { 505 impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
496 u64 dep_mask = streamer->GetDependenceMask(); 506 u64 dep_mask = streamer->GetDependenceMask();
497 if ((dep_mask & ~ran_mask) != 0) { 507 if ((dep_mask & ~ran_mask) != 0) {
498 next_phase |= dep_mask;
499 return; 508 return;
500 } 509 }
501 u64 index = streamer->GetId(); 510 u64 index = streamer->GetId();
@@ -503,7 +512,6 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
503 mask &= ~(1ULL << index); 512 mask &= ~(1ULL << index);
504 streamer->PopUnsyncedQueries(); 513 streamer->PopUnsyncedQueries();
505 }); 514 });
506 ran_mask |= next_phase;
507 } 515 }
508} 516}
509 517
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
index 55f508dd1..07be421c6 100644
--- a/src/video_core/query_cache/query_cache_base.h
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -47,7 +47,7 @@ public:
47 BitField<0, 27, u32> query_id; 47 BitField<0, 27, u32> query_id;
48 u32 raw; 48 u32 raw;
49 49
50 std::pair<size_t, size_t> unpack() { 50 std::pair<size_t, size_t> unpack() const {
51 return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; 51 return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
52 } 52 }
53 }; 53 };
@@ -73,7 +73,7 @@ public:
73 } 73 }
74 } 74 }
75 75
76 static u64 BuildMask(std::span<QueryType> types) { 76 static u64 BuildMask(std::span<const QueryType> types) {
77 u64 mask = 0; 77 u64 mask = 0;
78 for (auto query_type : types) { 78 for (auto query_type : types) {
79 mask |= 1ULL << (static_cast<u64>(query_type)); 79 mask |= 1ULL << (static_cast<u64>(query_type));
@@ -160,7 +160,7 @@ protected:
160 } 160 }
161 } 161 }
162 162
163 using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; 163 using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
164 164
165 void InvalidateQuery(QueryLocation location); 165 void InvalidateQuery(QueryLocation location);
166 bool IsQueryDirty(QueryLocation location); 166 bool IsQueryDirty(QueryLocation location);
@@ -175,7 +175,7 @@ protected:
175 friend struct QueryCacheBaseImpl; 175 friend struct QueryCacheBaseImpl;
176 friend RuntimeType; 176 friend RuntimeType;
177 177
178 std::unique_ptr<struct QueryCacheBaseImpl> impl; 178 std::unique_ptr<QueryCacheBaseImpl> impl;
179}; 179};
180 180
181} // namespace VideoCommon \ No newline at end of file 181} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
index 0e9275565..e7aac955b 100644
--- a/src/video_core/query_cache/query_stream.h
+++ b/src/video_core/query_cache/query_stream.h
@@ -16,7 +16,7 @@ namespace VideoCommon {
16 16
17class StreamerInterface { 17class StreamerInterface {
18public: 18public:
19 StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} 19 explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {}
20 virtual ~StreamerInterface() = default; 20 virtual ~StreamerInterface() = default;
21 21
22 virtual QueryBase* GetQuery(size_t id) = 0; 22 virtual QueryBase* GetQuery(size_t id) = 0;
@@ -37,7 +37,7 @@ public:
37 /* Do Nothing */ 37 /* Do Nothing */
38 } 38 }
39 39
40 virtual bool HasPendingSync() { 40 virtual bool HasPendingSync() const {
41 return false; 41 return false;
42 } 42 }
43 43
@@ -52,7 +52,7 @@ public:
52 virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 52 virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
53 std::optional<u32> subreport = std::nullopt) = 0; 53 std::optional<u32> subreport = std::nullopt) = 0;
54 54
55 virtual bool HasUnsyncedQueries() { 55 virtual bool HasUnsyncedQueries() const {
56 return false; 56 return false;
57 } 57 }
58 58
@@ -71,18 +71,28 @@ public:
71 } 71 }
72 72
73 u64 GetDependenceMask() const { 73 u64 GetDependenceMask() const {
74 return dependance_mask; 74 return dependence_mask;
75 }
76
77 u64 GetDependentMask() const {
78 return dependence_mask;
75 } 79 }
76 80
77protected: 81protected:
82 void MakeDependent(StreamerInterface* depend_on) {
83 dependence_mask |= 1ULL << depend_on->id;
84 depend_on->dependent_mask |= 1ULL << id;
85 }
86
78 const size_t id; 87 const size_t id;
79 const u64 dependance_mask; 88 u64 dependence_mask;
89 u64 dependent_mask;
80}; 90};
81 91
82template <typename QueryType> 92template <typename QueryType>
83class SimpleStreamer : public StreamerInterface { 93class SimpleStreamer : public StreamerInterface {
84public: 94public:
85 SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} 95 explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
86 virtual ~SimpleStreamer() = default; 96 virtual ~SimpleStreamer() = default;
87 97
88protected: 98protected:
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 2ba7cbb0d..af1469147 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,10 +9,10 @@
9#include <utility> 9#include <utility>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/polyfill_thread.h" 11#include "common/polyfill_thread.h"
12#include "video_core/query_cache/types.h"
13#include "video_core/cache_types.h" 12#include "video_core/cache_types.h"
14#include "video_core/engines/fermi_2d.h" 13#include "video_core/engines/fermi_2d.h"
15#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/query_cache/types.h"
16#include "video_core/rasterizer_download_area.h" 16#include "video_core/rasterizer_download_area.h"
17 17
18namespace Tegra { 18namespace Tegra {
@@ -57,7 +57,8 @@ public:
57 virtual void ResetCounter(VideoCommon::QueryType type) = 0; 57 virtual void ResetCounter(VideoCommon::QueryType type) = 0;
58 58
59 /// Records a GPU query and caches it 59 /// Records a GPU query and caches it
60 virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; 60 virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
61 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0;
61 62
62 /// Signal an uniform buffer binding 63 /// Signal an uniform buffer binding
63 virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 64 virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 57a8c4c85..23001eeb8 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -43,7 +43,8 @@ public:
43 void Clear(u32 layer_count) override; 43 void Clear(u32 layer_count) override;
44 void DispatchCompute() override; 44 void DispatchCompute() override;
45 void ResetCounter(VideoCommon::QueryType type) override; 45 void ResetCounter(VideoCommon::QueryType type) override;
46 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; 46 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
47 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
47 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 48 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
48 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 49 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
49 void FlushAll() override; 50 void FlushAll() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a975bbe75..27e2de1bf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
405void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, 405void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
406 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { 406 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
407 if (type == VideoCommon::QueryType::ZPassPixelCount64) { 407 if (type == VideoCommon::QueryType::ZPassPixelCount64) {
408 std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)
409 ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt };
410 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { 408 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
411 query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); 409 query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()});
412 } else { 410 } else {
@@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
414 } 412 }
415 return; 413 return;
416 } 414 }
417 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { 415 if (type != VideoCommon::QueryType::Payload) {
418 u64 ticks = gpu.GetTicks(); 416 payload = 1u;
419 gpu_memory->Write<u64>(gpu_addr + 8, ticks); 417 }
420 gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); 418 std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() {
421 } else { 419 if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
422 gpu_memory->Write<u32>(gpu_addr, payload); 420 u64 ticks = gpu.GetTicks();
421 memory_manager->Write<u64>(gpu_addr + 8, ticks);
422 memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload));
423 } else {
424 memory_manager->Write<u32>(gpu_addr, payload);
425 }
426 });
427 if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) {
428 SignalFence(std::move(func));
429 return;
423 } 430 }
431 func();
424} 432}
425 433
426void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, 434void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 05e048e15..ceffe1f1e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -87,7 +87,8 @@ public:
87 void Clear(u32 layer_count) override; 87 void Clear(u32 layer_count) override;
88 void DispatchCompute() override; 88 void DispatchCompute() override;
89 void ResetCounter(VideoCommon::QueryType type) override; 89 void ResetCounter(VideoCommon::QueryType type) override;
90 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; 90 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
91 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
91 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 92 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
92 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 93 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
93 void FlushAll() override; 94 void FlushAll() override;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 97cd4521d..039dc95e1 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -303,9 +303,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
303 return {staging.buffer, staging.offset}; 303 return {staging.buffer, staging.offset};
304} 304}
305 305
306ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, 306ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(
307 Scheduler& scheduler_, 307 const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
308 DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) 308 ComputePassDescriptorQueue& compute_pass_descriptor_queue_)
309 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, 309 : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS,
310 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, 310 INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr,
311 RESOLVE_CONDITIONAL_RENDER_COMP_SPV), 311 RESOLVE_CONDITIONAL_RENDER_COMP_SPV),
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 14fc5ad71..336573574 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -7,8 +7,8 @@
7 7
8#include "video_core/fence_manager.h" 8#include "video_core/fence_manager.h"
9#include "video_core/renderer_vulkan/vk_buffer_cache.h" 9#include "video_core/renderer_vulkan/vk_buffer_cache.h"
10#include "video_core/renderer_vulkan/vk_texture_cache.h"
11#include "video_core/renderer_vulkan/vk_query_cache.h" 10#include "video_core/renderer_vulkan/vk_query_cache.h"
11#include "video_core/renderer_vulkan/vk_texture_cache.h"
12 12
13namespace Core { 13namespace Core {
14class System; 14class System;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ef891e26b..add0c6fb3 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -11,11 +11,9 @@
11#include <utility> 11#include <utility>
12#include <vector> 12#include <vector>
13 13
14#include <boost/container/small_vector.hpp>
15#include <boost/icl/interval_set.hpp>
16
17#include "common/common_types.h" 14#include "common/common_types.h"
18#include "core/memory.h" 15#include "core/memory.h"
16#include "video_core/engines/draw_manager.h"
19#include "video_core/query_cache/query_cache.h" 17#include "video_core/query_cache/query_cache.h"
20#include "video_core/renderer_vulkan/vk_buffer_cache.h" 18#include "video_core/renderer_vulkan/vk_buffer_cache.h"
21#include "video_core/renderer_vulkan/vk_compute_pass.h" 19#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -30,6 +28,7 @@
30 28
31namespace Vulkan { 29namespace Vulkan {
32 30
31using Tegra::Engines::Maxwell3D;
33using VideoCommon::QueryType; 32using VideoCommon::QueryType;
34 33
35namespace { 34namespace {
@@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase {
37public: 36public:
38 static constexpr size_t BANK_SIZE = 256; 37 static constexpr size_t BANK_SIZE = 256;
39 static constexpr size_t QUERY_SIZE = 8; 38 static constexpr size_t QUERY_SIZE = 8;
40 SamplesQueryBank(const Device& device_, size_t index_) 39 explicit SamplesQueryBank(const Device& device_, size_t index_)
41 : BankBase(BANK_SIZE), device{device_}, index{index_} { 40 : BankBase(BANK_SIZE), device{device_}, index{index_} {
42 const auto& dev = device.GetLogical(); 41 const auto& dev = device.GetLogical();
43 query_pool = dev.CreateQueryPool({ 42 query_pool = dev.CreateQueryPool({
@@ -109,18 +108,19 @@ struct HostSyncValues {
109 static constexpr bool GeneratesBaseBuffer = false; 108 static constexpr bool GeneratesBaseBuffer = false;
110}; 109};
111 110
112template <typename Traits>
113class SamplesStreamer : public BaseStreamer { 111class SamplesStreamer : public BaseStreamer {
114public: 112public:
115 SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, 113 explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
116 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) 114 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
117 : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, 115 : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
118 memory_allocator{memory_allocator_} { 116 memory_allocator{memory_allocator_} {
119 BuildResolveBuffer(); 117 BuildResolveBuffer();
120 current_bank = nullptr; 118 current_bank = nullptr;
121 current_query = nullptr; 119 current_query = nullptr;
122 } 120 }
123 121
122 ~SamplesStreamer() = default;
123
124 void StartCounter() override { 124 void StartCounter() override {
125 if (has_started) { 125 if (has_started) {
126 return; 126 return;
@@ -157,7 +157,7 @@ public:
157 PauseCounter(); 157 PauseCounter();
158 } 158 }
159 159
160 bool HasPendingSync() override { 160 bool HasPendingSync() const override {
161 return !pending_sync.empty(); 161 return !pending_sync.empty();
162 } 162 }
163 163
@@ -198,7 +198,7 @@ public:
198 } 198 }
199 resolve_slots_remaining = resolve_slots; 199 resolve_slots_remaining = resolve_slots;
200 sync_values_stash.emplace_back(); 200 sync_values_stash.emplace_back();
201 sync_values = sync_values = &sync_values_stash.back(); 201 sync_values = &sync_values_stash.back();
202 sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); 202 sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE);
203 } 203 }
204 resolve_slots_remaining--; 204 resolve_slots_remaining--;
@@ -207,6 +207,7 @@ public:
207 const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * 207 const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE *
208 (resolve_slots - resolve_slots_remaining - 1); 208 (resolve_slots - resolve_slots_remaining - 1);
209 VkQueryPool query_pool = bank->GetInnerPool(); 209 VkQueryPool query_pool = bank->GetInnerPool();
210 scheduler.RequestOutsideRenderPassOperationContext();
210 scheduler.Record([start, amount, base_offset, query_pool, 211 scheduler.Record([start, amount, base_offset, query_pool,
211 buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { 212 buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) {
212 size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; 213 size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE;
@@ -284,7 +285,7 @@ public:
284 return index; 285 return index;
285 } 286 }
286 287
287 bool HasUnsyncedQueries() override { 288 bool HasUnsyncedQueries() const override {
288 return !pending_flush_queries.empty(); 289 return !pending_flush_queries.empty();
289 } 290 }
290 291
@@ -348,8 +349,8 @@ private:
348 for (auto q : queries) { 349 for (auto q : queries) {
349 auto* query = GetQuery(q); 350 auto* query = GetQuery(q);
350 ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { 351 ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) {
351 auto id = bank->GetIndex(); 352 auto id_ = bank->GetIndex();
352 auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(), 353 auto pair = indexer.try_emplace(id_, std::numeric_limits<size_t>::max(),
353 std::numeric_limits<size_t>::min()); 354 std::numeric_limits<size_t>::min());
354 auto& current_pair = pair.first->second; 355 auto& current_pair = pair.first->second;
355 current_pair.first = std::min(current_pair.first, start); 356 current_pair.first = std::min(current_pair.first, start);
@@ -434,13 +435,14 @@ private:
434 .pNext = nullptr, 435 .pNext = nullptr,
435 .flags = 0, 436 .flags = 0,
436 .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, 437 .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots,
437 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 438 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
439 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
438 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 440 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
439 .queueFamilyIndexCount = 0, 441 .queueFamilyIndexCount = 0,
440 .pQueueFamilyIndices = nullptr, 442 .pQueueFamilyIndices = nullptr,
441 }; 443 };
442 resolve_buffers.emplace_back( 444 resolve_buffers.emplace_back(
443 std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); 445 memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal));
444 } 446 }
445 447
446 static constexpr size_t resolve_slots = 8; 448 static constexpr size_t resolve_slots = 8;
@@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase {
476public: 478public:
477 static constexpr size_t BANK_SIZE = 1024; 479 static constexpr size_t BANK_SIZE = 1024;
478 static constexpr size_t QUERY_SIZE = 4; 480 static constexpr size_t QUERY_SIZE = 4;
479 TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) 481 explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator,
482 size_t index_)
480 : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { 483 : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} {
481 const VkBufferCreateInfo buffer_ci = { 484 const VkBufferCreateInfo buffer_ci = {
482 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 485 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@@ -525,22 +528,21 @@ private:
525 vk::Buffer buffer; 528 vk::Buffer buffer;
526}; 529};
527 530
528template <typename Traits>
529class PrimitivesSucceededStreamer; 531class PrimitivesSucceededStreamer;
530 532
531template <typename Traits>
532class TFBCounterStreamer : public BaseStreamer { 533class TFBCounterStreamer : public BaseStreamer {
533public: 534public:
534 TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, 535 explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_,
535 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, 536 Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
536 StagingBufferPool& staging_pool_) 537 StagingBufferPool& staging_pool_)
537 : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, 538 : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_},
538 memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { 539 memory_allocator{memory_allocator_}, staging_pool{staging_pool_} {
539 buffers_count = 0; 540 buffers_count = 0;
540 current_bank = nullptr; 541 current_bank = nullptr;
541 counter_buffers.fill(VK_NULL_HANDLE); 542 counter_buffers.fill(VK_NULL_HANDLE);
542 offsets.fill(0); 543 offsets.fill(0);
543 last_queries.fill(0); 544 last_queries.fill(0);
545 last_queries_stride.fill(1);
544 const VkBufferCreateInfo buffer_ci = { 546 const VkBufferCreateInfo buffer_ci = {
545 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 547 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
546 .pNext = nullptr, 548 .pNext = nullptr,
@@ -564,6 +566,8 @@ public:
564 } 566 }
565 } 567 }
566 568
569 ~TFBCounterStreamer() = default;
570
567 void StartCounter() override { 571 void StartCounter() override {
568 FlushBeginTFB(); 572 FlushBeginTFB();
569 has_started = true; 573 has_started = true;
@@ -581,15 +585,15 @@ public:
581 if (has_flushed_end_pending) { 585 if (has_flushed_end_pending) {
582 FlushEndTFB(); 586 FlushEndTFB();
583 } 587 }
584 runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { 588 runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
585 if (regs.transform_feedback_enabled == 0) { 589 if (maxwell3d.regs.transform_feedback_enabled == 0) {
586 streams_mask = 0; 590 streams_mask = 0;
587 has_started = false; 591 has_started = false;
588 } 592 }
589 }); 593 });
590 } 594 }
591 595
592 bool HasPendingSync() override { 596 bool HasPendingSync() const override {
593 return !pending_sync.empty(); 597 return !pending_sync.empty();
594 } 598 }
595 599
@@ -650,14 +654,19 @@ public:
650 return index; 654 return index;
651 } 655 }
652 656
653 std::optional<VAddr> GetLastQueryStream(size_t stream) { 657 std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) {
654 if (last_queries[stream] != 0) { 658 if (last_queries[stream] != 0) {
655 return {last_queries[stream]}; 659 std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]);
660 return result;
656 } 661 }
657 return std::nullopt; 662 return std::nullopt;
658 } 663 }
659 664
660 bool HasUnsyncedQueries() override { 665 Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const {
666 return out_topology;
667 }
668
669 bool HasUnsyncedQueries() const override {
661 return !pending_flush_queries.empty(); 670 return !pending_flush_queries.empty();
662 } 671 }
663 672
@@ -762,15 +771,17 @@ private:
762 771
763 void UpdateBuffers() { 772 void UpdateBuffers() {
764 last_queries.fill(0); 773 last_queries.fill(0);
765 runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { 774 last_queries_stride.fill(1);
775 runtime.View3DRegs([this](Maxwell3D& maxwell3d) {
766 buffers_count = 0; 776 buffers_count = 0;
767 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; 777 out_topology = maxwell3d.draw_manager->GetDrawState().topology;
768 i++) { 778 for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
769 const auto& tf = regs.transform_feedback; 779 const auto& tf = maxwell3d.regs.transform_feedback;
770 if (tf.buffers[i].enable == 0) { 780 if (tf.buffers[i].enable == 0) {
771 continue; 781 continue;
772 } 782 }
773 const size_t stream = tf.controls[i].stream; 783 const size_t stream = tf.controls[i].stream;
784 last_queries_stride[stream] = tf.controls[i].stride;
774 streams_mask |= 1ULL << stream; 785 streams_mask |= 1ULL << stream;
775 buffers_count = std::max<size_t>(buffers_count, stream + 1); 786 buffers_count = std::max<size_t>(buffers_count, stream + 1);
776 } 787 }
@@ -785,7 +796,8 @@ private:
785 }); 796 });
786 current_bank = &bank_pool.GetBank(current_bank_id); 797 current_bank = &bank_pool.GetBank(current_bank_id);
787 } 798 }
788 auto [dont_care, slot] = current_bank->Reserve(); 799 auto [dont_care, other] = current_bank->Reserve();
800 const size_t slot = other; // workaround to compile bug.
789 current_bank->AddReference(); 801 current_bank->AddReference();
790 802
791 static constexpr VkMemoryBarrier READ_BARRIER{ 803 static constexpr VkMemoryBarrier READ_BARRIER{
@@ -818,11 +830,9 @@ private:
818 return {current_bank_id, slot}; 830 return {current_bank_id, slot};
819 } 831 }
820 832
821 template <typename Traits>
822 friend class PrimitivesSucceededStreamer; 833 friend class PrimitivesSucceededStreamer;
823 834
824 static constexpr size_t NUM_STREAMS = 4; 835 static constexpr size_t NUM_STREAMS = 4;
825 static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
826 836
827 QueryCacheRuntime& runtime; 837 QueryCacheRuntime& runtime;
828 const Device& device; 838 const Device& device;
@@ -851,6 +861,8 @@ private:
851 std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; 861 std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
852 std::array<VkDeviceSize, NUM_STREAMS> offsets{}; 862 std::array<VkDeviceSize, NUM_STREAMS> offsets{};
853 std::array<VAddr, NUM_STREAMS> last_queries; 863 std::array<VAddr, NUM_STREAMS> last_queries;
864 std::array<size_t, NUM_STREAMS> last_queries_stride;
865 Maxwell3D::Regs::PrimitiveTopology out_topology;
854 u64 streams_mask; 866 u64 streams_mask;
855}; 867};
856 868
@@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase {
858public: 870public:
859 // Default constructor 871 // Default constructor
860 PrimitivesQueryBase() 872 PrimitivesQueryBase()
861 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, 873 : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {}
862 dependant_index{}, dependant_manage{} {}
863 874
864 // Parameterized constructor 875 // Parameterized constructor
865 PrimitivesQueryBase(bool is_long, VAddr address) 876 PrimitivesQueryBase(bool has_timestamp, VAddr address)
866 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, 877 : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) {
867 dependant_index{}, dependant_manage{} { 878 if (has_timestamp) {
868 if (is_long) {
869 flags |= VideoCommon::QueryFlagBits::HasTimestamp; 879 flags |= VideoCommon::QueryFlagBits::HasTimestamp;
870 } 880 }
871 } 881 }
872 882
873 u64 stride; 883 u64 stride{};
874 VAddr dependant_address; 884 VAddr dependant_address{};
875 size_t dependant_index; 885 Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points};
876 bool dependant_manage; 886 size_t dependant_index{};
887 bool dependant_manage{};
877}; 888};
878 889
879template <typename Traits>
880class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> { 890class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
881public: 891public:
882 PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, 892 explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_,
883 TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_) 893 TFBCounterStreamer& tfb_streamer_,
884 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>( 894 Core::Memory::Memory& cpu_memory_)
885 id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)), 895 : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_},
886 runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} 896 tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {
897 MakeDependent(&tfb_streamer);
898 }
899
900 ~PrimitivesSucceededStreamer() = default;
887 901
888 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, 902 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
889 std::optional<u32> subreport_) override { 903 std::optional<u32> subreport_) override {
@@ -901,8 +915,11 @@ public:
901 const size_t subreport = static_cast<size_t>(*subreport_); 915 const size_t subreport = static_cast<size_t>(*subreport_);
902 auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); 916 auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
903 bool must_manage_dependance = false; 917 bool must_manage_dependance = false;
918 new_query->topology = tfb_streamer.GetOutputTopology();
904 if (dependant_address_opt) { 919 if (dependant_address_opt) {
905 new_query->dependant_address = *dependant_address_opt; 920 auto [dep_address, stride] = *dependant_address_opt;
921 new_query->dependant_address = dep_address;
922 new_query->stride = stride;
906 } else { 923 } else {
907 new_query->dependant_index = 924 new_query->dependant_index =
908 tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); 925 tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
@@ -917,25 +934,28 @@ public:
917 } 934 }
918 return index; 935 return index;
919 } 936 }
937 new_query->stride = 1;
938 runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) {
939 for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) {
940 const auto& tf = maxwell3d.regs.transform_feedback;
941 if (tf.buffers[i].enable == 0) {
942 continue;
943 }
944 if (tf.controls[i].stream != subreport) {
945 continue;
946 }
947 new_query->stride = tf.controls[i].stride;
948 break;
949 }
950 });
920 } 951 }
921 952
922 new_query->dependant_manage = must_manage_dependance; 953 new_query->dependant_manage = must_manage_dependance;
923 runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
924 for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
925 i++) {
926 const auto& tf = regs.transform_feedback;
927 if (tf.controls[i].stream != subreport) {
928 continue;
929 }
930 new_query->stride = tf.controls[i].stride;
931 break;
932 }
933 });
934 pending_flush_queries.push_back(index); 954 pending_flush_queries.push_back(index);
935 return index; 955 return index;
936 } 956 }
937 957
938 bool HasUnsyncedQueries() override { 958 bool HasUnsyncedQueries() const override {
939 return !pending_flush_queries.empty(); 959 return !pending_flush_queries.empty();
940 } 960 }
941 961
@@ -960,22 +980,49 @@ public:
960 } 980 }
961 981
962 query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; 982 query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
983 u64 num_vertices = 0;
963 if (query->dependant_manage) { 984 if (query->dependant_manage) {
964 auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); 985 auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
965 query->value = dependant_query->value / query->stride; 986 num_vertices = dependant_query->value / query->stride;
966 tfb_streamer.Free(query->dependant_index); 987 tfb_streamer.Free(query->dependant_index);
967 } else { 988 } else {
968 u8* pointer = cpu_memory.GetPointer(query->dependant_address); 989 u8* pointer = cpu_memory.GetPointer(query->dependant_address);
969 u32 result; 990 u32 result;
970 std::memcpy(&result, pointer, sizeof(u32)); 991 std::memcpy(&result, pointer, sizeof(u32));
971 query->value = static_cast<u64>(result) / query->stride; 992 num_vertices = static_cast<u64>(result) / query->stride;
972 } 993 }
994 query->value = [&]() -> u64 {
995 switch (query->topology) {
996 case Maxwell3D::Regs::PrimitiveTopology::Points:
997 return num_vertices;
998 case Maxwell3D::Regs::PrimitiveTopology::Lines:
999 return num_vertices / 2;
1000 case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
1001 return (num_vertices / 2) + 1;
1002 case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
1003 return num_vertices - 1;
1004 case Maxwell3D::Regs::PrimitiveTopology::Patches:
1005 case Maxwell3D::Regs::PrimitiveTopology::Triangles:
1006 case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
1007 return num_vertices / 3;
1008 case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
1009 case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
1010 case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
1011 return num_vertices - 2;
1012 case Maxwell3D::Regs::PrimitiveTopology::Quads:
1013 return num_vertices / 4;
1014 case Maxwell3D::Regs::PrimitiveTopology::Polygon:
1015 return 1U;
1016 default:
1017 return num_vertices;
1018 }
1019 }();
973 } 1020 }
974 } 1021 }
975 1022
976private: 1023private:
977 QueryCacheRuntime& runtime; 1024 QueryCacheRuntime& runtime;
978 TFBCounterStreamer<QueryCacheParams>& tfb_streamer; 1025 TFBCounterStreamer& tfb_streamer;
979 Core::Memory::Memory& cpu_memory; 1026 Core::Memory::Memory& cpu_memory;
980 1027
981 // syncing queue 1028 // syncing queue
@@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl {
1005 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, 1052 tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
1006 scheduler, memory_allocator, staging_pool), 1053 scheduler, memory_allocator, staging_pool),
1007 primitives_succeeded_streamer( 1054 primitives_succeeded_streamer(
1008 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), 1055 static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer,
1056 cpu_memory_),
1057 primitives_needed_minus_suceeded_streamer(
1058 static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u),
1009 hcr_setup{}, hcr_is_set{}, is_hcr_running{} { 1059 hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
1010 1060
1011 hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; 1061 hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
@@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl {
1040 1090
1041 // Streamers 1091 // Streamers
1042 VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; 1092 VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
1043 SamplesStreamer<QueryCacheParams> sample_streamer; 1093 SamplesStreamer sample_streamer;
1044 TFBCounterStreamer<QueryCacheParams> tfb_streamer; 1094 TFBCounterStreamer tfb_streamer;
1045 PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer; 1095 PrimitivesSucceededStreamer primitives_succeeded_streamer;
1096 VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_suceeded_streamer;
1046 1097
1047 std::vector<std::pair<VAddr, VAddr>> little_cache; 1098 std::vector<std::pair<VAddr, VAddr>> little_cache;
1048 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; 1099 std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
@@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl {
1059 bool is_hcr_running; 1110 bool is_hcr_running;
1060 1111
1061 // maxwell3d 1112 // maxwell3d
1062 Tegra::Engines::Maxwell3D* maxwell3d; 1113 Maxwell3D* maxwell3d;
1063}; 1114};
1064 1115
1065QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, 1116QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
@@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
1074 staging_pool_, compute_pass_descriptor_queue, descriptor_pool); 1125 staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
1075} 1126}
1076 1127
1077void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { 1128void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) {
1078 impl->maxwell3d = maxwell3d; 1129 impl->maxwell3d = maxwell3d;
1079} 1130}
1080 1131
1081template <typename Func> 1132template <typename Func>
1082void QueryCacheRuntime::View3DRegs(Func&& func) { 1133void QueryCacheRuntime::View3DRegs(Func&& func) {
1083 func(impl->maxwell3d->regs); 1134 func(*impl->maxwell3d);
1084} 1135}
1085 1136
1086void QueryCacheRuntime::EndHostConditionalRendering() { 1137void QueryCacheRuntime::EndHostConditionalRendering() {
@@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
1240 return &impl->sample_streamer; 1291 return &impl->sample_streamer;
1241 case QueryType::StreamingByteCount: 1292 case QueryType::StreamingByteCount:
1242 return &impl->tfb_streamer; 1293 return &impl->tfb_streamer;
1294 case QueryType::StreamingPrimitivesNeeded:
1295 case QueryType::VtgPrimitivesOut:
1243 case QueryType::StreamingPrimitivesSucceeded: 1296 case QueryType::StreamingPrimitivesSucceeded:
1244 return &impl->primitives_succeeded_streamer; 1297 return &impl->primitives_succeeded_streamer;
1298 case QueryType::StreamingPrimitivesNeededMinusSucceeded:
1299 return &impl->primitives_needed_minus_suceeded_streamer;
1245 default: 1300 default:
1246 return nullptr; 1301 return nullptr;
1247 } 1302 }
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 9ad2929d7..e9a1ea169 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -49,7 +49,8 @@ public:
49 bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); 49 bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty);
50 50
51 bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, 51 bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1,
52 VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); 52 VideoCommon::LookupData object_2, bool qc_dirty,
53 bool equal_check);
53 54
54 VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); 55 VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type);
55 56
@@ -66,7 +67,7 @@ private:
66}; 67};
67 68
68struct QueryCacheParams { 69struct QueryCacheParams {
69 using RuntimeType = Vulkan::QueryCacheRuntime; 70 using RuntimeType = typename Vulkan::QueryCacheRuntime;
70}; 71};
71 72
72using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; 73using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index e8862ba04..c7ce7c312 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
194 194
195 query_cache.NotifySegment(true); 195 query_cache.NotifySegment(true);
196 196
197#if ANDROID
198 if (Settings::IsGPULevelHigh()) {
199 // This is problematic on Android, disable on GPU Normal.
200 // query_cache.UpdateCounters();
201 }
202#else
203 // query_cache.UpdateCounters();
204#endif
205
206 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; 197 GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
207 if (!pipeline) { 198 if (!pipeline) {
208 return; 199 return;
@@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() {
294 285
295 query_cache.NotifySegment(true); 286 query_cache.NotifySegment(true);
296 287
297#if ANDROID
298 if (Settings::IsGPULevelHigh()) {
299 // This is problematic on Android, disable on GPU Normal.
300 // query_cache.UpdateCounters();
301 }
302#else
303 // query_cache.UpdateCounters();
304#endif
305
306 texture_cache.SynchronizeGraphicsDescriptors(); 288 texture_cache.SynchronizeGraphicsDescriptors();
307 texture_cache.UpdateRenderTargets(false); 289 texture_cache.UpdateRenderTargets(false);
308 290
@@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
332 FlushWork(); 314 FlushWork();
333 gpu_memory->FlushCaching(); 315 gpu_memory->FlushCaching();
334 316
335#if ANDROID
336 if (Settings::IsGPULevelHigh()) {
337 // This is problematic on Android, disable on GPU Normal.
338 // query_cache.UpdateCounters();
339 }
340#else
341 // query_cache.UpdateCounters();
342#endif
343
344 query_cache.NotifySegment(true); 317 query_cache.NotifySegment(true);
345 query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, 318 query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
346 maxwell3d->regs.zpass_pixel_count_enable); 319 maxwell3d->regs.zpass_pixel_count_enable);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index ffd44c68d..ad069556c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -85,7 +85,8 @@ public:
85 void Clear(u32 layer_count) override; 85 void Clear(u32 layer_count) override;
86 void DispatchCompute() override; 86 void DispatchCompute() override;
87 void ResetCounter(VideoCommon::QueryType type) override; 87 void ResetCounter(VideoCommon::QueryType type) override;
88 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; 88 void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
89 VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override;
89 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; 90 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
90 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; 91 void DisableGraphicsUniformBuffer(size_t stage, u32 index) override;
91 void FlushAll() override; 92 void FlushAll() override;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index c87e5fb07..da03803aa 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -15,9 +15,13 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/polyfill_thread.h" 16#include "common/polyfill_thread.h"
17#include "video_core/renderer_vulkan/vk_master_semaphore.h" 17#include "video_core/renderer_vulkan/vk_master_semaphore.h"
18#include "video_core/renderer_vulkan/vk_query_cache.h"
19#include "video_core/vulkan_common/vulkan_wrapper.h" 18#include "video_core/vulkan_common/vulkan_wrapper.h"
20 19
20namespace VideoCommon {
21template <typename Trait>
22class QueryCacheBase;
23}
24
21namespace Vulkan { 25namespace Vulkan {
22 26
23class CommandPool; 27class CommandPool;
@@ -26,6 +30,8 @@ class Framebuffer;
26class GraphicsPipeline; 30class GraphicsPipeline;
27class StateTracker; 31class StateTracker;
28 32
33struct QueryCacheParams;
34
29/// The scheduler abstracts command buffer and fence management with an interface that's able to do 35/// The scheduler abstracts command buffer and fence management with an interface that's able to do
30/// OpenGL-like operations on Vulkan command buffers. 36/// OpenGL-like operations on Vulkan command buffers.
31class Scheduler { 37class Scheduler {
@@ -63,7 +69,7 @@ public:
63 void InvalidateState(); 69 void InvalidateState();
64 70
65 /// Assigns the query cache. 71 /// Assigns the query cache.
66 void SetQueryCache(QueryCache& query_cache_) { 72 void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) {
67 query_cache = &query_cache_; 73 query_cache = &query_cache_;
68 } 74 }
69 75
@@ -219,7 +225,7 @@ private:
219 std::unique_ptr<MasterSemaphore> master_semaphore; 225 std::unique_ptr<MasterSemaphore> master_semaphore;
220 std::unique_ptr<CommandPool> command_pool; 226 std::unique_ptr<CommandPool> command_pool;
221 227
222 QueryCache* query_cache = nullptr; 228 VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
223 229
224 vk::CommandBuffer current_cmdbuf; 230 vk::CommandBuffer current_cmdbuf;
225 231