summaryrefslogtreecommitdiff
path: root/src/video_core/query_cache
diff options
context:
space:
mode:
authorGravatar liamwhite2023-09-25 09:18:29 -0400
committerGravatar GitHub2023-09-25 09:18:29 -0400
commit854457a392b6d38168f7f9d19d1fa8c43fad653c (patch)
tree3bc1007b5776f1ce82c057875609105de0a1ca44 /src/video_core/query_cache
parentMerge pull request #11569 from german77/lle_applet (diff)
parentQuery Cache: Fix Prefix Sums (diff)
downloadyuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.tar.gz
yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.tar.xz
yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.zip
Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Y.F.C: Rework the Query Cache.
Diffstat (limited to 'src/video_core/query_cache')
-rw-r--r--src/video_core/query_cache/bank_base.h104
-rw-r--r--src/video_core/query_cache/query_base.h70
-rw-r--r--src/video_core/query_cache/query_cache.h580
-rw-r--r--src/video_core/query_cache/query_cache_base.h181
-rw-r--r--src/video_core/query_cache/query_stream.h149
-rw-r--r--src/video_core/query_cache/types.h74
6 files changed, 1158 insertions, 0 deletions
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h
new file mode 100644
index 000000000..420927091
--- /dev/null
+++ b/src/video_core/query_cache/bank_base.h
@@ -0,0 +1,104 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <atomic>
7#include <deque>
8#include <utility>
9
10#include "common/common_types.h"
11
12namespace VideoCommon {
13
14class BankBase {
15protected:
16 const size_t base_bank_size{};
17 size_t bank_size{};
18 std::atomic<size_t> references{};
19 size_t current_slot{};
20
21public:
22 explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {}
23
24 virtual ~BankBase() = default;
25
26 virtual std::pair<bool, size_t> Reserve() {
27 if (IsClosed()) {
28 return {false, bank_size};
29 }
30 const size_t result = current_slot++;
31 return {true, result};
32 }
33
34 virtual void Reset() {
35 current_slot = 0;
36 references = 0;
37 bank_size = base_bank_size;
38 }
39
40 size_t Size() const {
41 return bank_size;
42 }
43
44 void AddReference(size_t how_many = 1) {
45 references.fetch_add(how_many, std::memory_order_relaxed);
46 }
47
48 void CloseReference(size_t how_many = 1) {
49 if (how_many > references.load(std::memory_order_relaxed)) {
50 UNREACHABLE();
51 }
52 references.fetch_sub(how_many, std::memory_order_relaxed);
53 }
54
55 void Close() {
56 bank_size = current_slot;
57 }
58
59 bool IsClosed() const {
60 return current_slot >= bank_size;
61 }
62
63 bool IsDead() const {
64 return IsClosed() && references == 0;
65 }
66};
67
68template <typename BankType>
69class BankPool {
70private:
71 std::deque<BankType> bank_pool;
72 std::deque<size_t> bank_indices;
73
74public:
75 BankPool() = default;
76 ~BankPool() = default;
77
78 // Reserve a bank from the pool and return its index
79 template <typename Func>
80 size_t ReserveBank(Func&& builder) {
81 if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) {
82 size_t new_index = bank_indices.front();
83 bank_indices.pop_front();
84 bank_pool[new_index].Reset();
85 return new_index;
86 }
87 size_t new_index = bank_pool.size();
88 builder(bank_pool, new_index);
89 bank_indices.push_back(new_index);
90 return new_index;
91 }
92
93 // Get a reference to a bank using its index
94 BankType& GetBank(size_t index) {
95 return bank_pool[index];
96 }
97
98 // Get the total number of banks in the pool
99 size_t BankCount() const {
100 return bank_pool.size();
101 }
102};
103
104} // namespace VideoCommon
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
new file mode 100644
index 000000000..1d786b3a7
--- /dev/null
+++ b/src/video_core/query_cache/query_base.h
@@ -0,0 +1,70 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11enum class QueryFlagBits : u32 {
12 HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp.
13 IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
14 IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
15 IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
16 IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
17 IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
20 IsFence = 1 << 8, ///< Indicates the query is a fence.
21};
22DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
23
24class QueryBase {
25public:
26 VAddr guest_address{};
27 QueryFlagBits flags{};
28 u64 value{};
29
30protected:
31 // Default constructor
32 QueryBase() = default;
33
34 // Parameterized constructor
35 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
36 : guest_address(address), flags(flags_), value{value_} {}
37};
38
39class GuestQuery : public QueryBase {
40public:
41 // Parameterized constructor
42 GuestQuery(bool isLong, VAddr address, u64 queryValue)
43 : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) {
44 if (isLong) {
45 flags |= QueryFlagBits::HasTimestamp;
46 }
47 }
48};
49
50class HostQueryBase : public QueryBase {
51public:
52 // Default constructor
53 HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {}
54
55 // Parameterized constructor
56 HostQueryBase(bool has_timestamp, VAddr address)
57 : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
58 start_slot{}, size_slots{} {
59 if (has_timestamp) {
60 flags |= QueryFlagBits::HasTimestamp;
61 }
62 }
63
64 u32 start_bank_id{};
65 u32 size_banks{};
66 size_t start_slot{};
67 size_t size_slots{};
68};
69
70} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
new file mode 100644
index 000000000..78b42b518
--- /dev/null
+++ b/src/video_core/query_cache/query_cache.h
@@ -0,0 +1,580 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <array>
7#include <deque>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <utility>
12
13#include "common/assert.h"
14#include "common/common_types.h"
15#include "common/logging/log.h"
16#include "common/scope_exit.h"
17#include "common/settings.h"
18#include "core/memory.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h"
21#include "video_core/memory_manager.h"
22#include "video_core/query_cache/bank_base.h"
23#include "video_core/query_cache/query_base.h"
24#include "video_core/query_cache/query_cache_base.h"
25#include "video_core/query_cache/query_stream.h"
26#include "video_core/query_cache/types.h"
27
28namespace VideoCommon {
29
30using Maxwell = Tegra::Engines::Maxwell3D;
31
32struct SyncValuesStruct {
33 VAddr address;
34 u64 value;
35 u64 size;
36
37 static constexpr bool GeneratesBaseBuffer = true;
38};
39
40template <typename Traits>
41class GuestStreamer : public SimpleStreamer<GuestQuery> {
42public:
43 using RuntimeType = typename Traits::RuntimeType;
44
45 GuestStreamer(size_t id_, RuntimeType& runtime_)
46 : SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {}
47
48 virtual ~GuestStreamer() = default;
49
50 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
51 std::optional<u32> subreport = std::nullopt) override {
52 auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value));
53 pending_sync.push_back(new_id);
54 return new_id;
55 }
56
57 bool HasPendingSync() const override {
58 return !pending_sync.empty();
59 }
60
61 void SyncWrites() override {
62 if (pending_sync.empty()) {
63 return;
64 }
65 std::vector<SyncValuesStruct> sync_values;
66 sync_values.reserve(pending_sync.size());
67 for (size_t pending_id : pending_sync) {
68 auto& query = slot_queries[pending_id];
69 if (True(query.flags & QueryFlagBits::IsRewritten) ||
70 True(query.flags & QueryFlagBits::IsInvalidated)) {
71 continue;
72 }
73 query.flags |= QueryFlagBits::IsHostSynced;
74 sync_values.emplace_back(SyncValuesStruct{
75 .address = query.guest_address,
76 .value = query.value,
77 .size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)});
78 }
79 pending_sync.clear();
80 if (sync_values.size() > 0) {
81 runtime.template SyncValues<SyncValuesStruct>(sync_values);
82 }
83 }
84
85private:
86 RuntimeType& runtime;
87 std::deque<size_t> pending_sync;
88};
89
90template <typename Traits>
91class StubStreamer : public GuestStreamer<Traits> {
92public:
93 using RuntimeType = typename Traits::RuntimeType;
94
95 StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_)
96 : GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {}
97
98 ~StubStreamer() override = default;
99
100 size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
101 std::optional<u32> subreport = std::nullopt) override {
102 size_t new_id =
103 GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport);
104 return new_id;
105 }
106
107private:
108 u32 stub_value;
109};
110
111template <typename Traits>
112struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
113 using RuntimeType = typename Traits::RuntimeType;
114
115 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
116 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_)
117 : owner{owner_}, rasterizer{rasterizer_},
118 cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} {
119 streamer_mask = 0;
120 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
121 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
122 if (streamers[i]) {
123 streamer_mask |= 1ULL << streamers[i]->GetId();
124 }
125 }
126 }
127
128 template <typename Func>
129 void ForEachStreamerIn(u64 mask, Func&& func) {
130 static constexpr bool RETURNS_BOOL =
131 std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>;
132 while (mask != 0) {
133 size_t position = std::countr_zero(mask);
134 mask &= ~(1ULL << position);
135 if constexpr (RETURNS_BOOL) {
136 if (func(streamers[position])) {
137 return;
138 }
139 } else {
140 func(streamers[position]);
141 }
142 }
143 }
144
145 template <typename Func>
146 void ForEachStreamer(Func&& func) {
147 ForEachStreamerIn(streamer_mask, func);
148 }
149
150 QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) {
151 size_t which_stream = location.stream_id.Value();
152 auto* streamer = streamers[which_stream];
153 if (!streamer) {
154 return nullptr;
155 }
156 return streamer->GetQuery(location.query_id.Value());
157 }
158
159 QueryCacheBase<Traits>* owner;
160 VideoCore::RasterizerInterface& rasterizer;
161 Core::Memory::Memory& cpu_memory;
162 RuntimeType& runtime;
163 Tegra::GPU& gpu;
164 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
165 u64 streamer_mask;
166 std::mutex flush_guard;
167 std::deque<u64> flushes_pending;
168 std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
169};
170
171template <typename Traits>
172QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
173 VideoCore::RasterizerInterface& rasterizer_,
174 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_)
175 : cached_queries{} {
176 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
177 this, rasterizer_, cpu_memory_, runtime_, gpu_);
178}
179
180template <typename Traits>
181QueryCacheBase<Traits>::~QueryCacheBase() = default;
182
183template <typename Traits>
184void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) {
185 size_t index = static_cast<size_t>(counter_type);
186 StreamerInterface* streamer = impl->streamers[index];
187 if (!streamer) [[unlikely]] {
188 UNREACHABLE();
189 return;
190 }
191 if (is_enabled) {
192 streamer->StartCounter();
193 } else {
194 streamer->PauseCounter();
195 }
196}
197
198template <typename Traits>
199void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
200 size_t index = static_cast<size_t>(counter_type);
201 StreamerInterface* streamer = impl->streamers[index];
202 if (!streamer) [[unlikely]] {
203 UNREACHABLE();
204 return;
205 }
206 streamer->CloseCounter();
207}
208
209template <typename Traits>
210void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
211 size_t index = static_cast<size_t>(counter_type);
212 StreamerInterface* streamer = impl->streamers[index];
213 if (!streamer) [[unlikely]] {
214 UNIMPLEMENTED();
215 return;
216 }
217 streamer->ResetCounter();
218}
219
220template <typename Traits>
221void QueryCacheBase<Traits>::BindToChannel(s32 id) {
222 VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id);
223 impl->runtime.Bind3DEngine(maxwell3d);
224}
225
226template <typename Traits>
227void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
228 QueryPropertiesFlags flags, u32 payload, u32 subreport) {
229 const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout);
230 const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
231 size_t streamer_id = static_cast<size_t>(counter_type);
232 auto* streamer = impl->streamers[streamer_id];
233 if (streamer == nullptr) [[unlikely]] {
234 counter_type = QueryType::Payload;
235 payload = 1U;
236 streamer_id = static_cast<size_t>(counter_type);
237 streamer = impl->streamers[streamer_id];
238 }
239 auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
240 if (!cpu_addr_opt) [[unlikely]] {
241 return;
242 }
243 VAddr cpu_addr = *cpu_addr_opt;
244 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
245 auto* query = streamer->GetQuery(new_query_id);
246 if (is_fence) {
247 query->flags |= QueryFlagBits::IsFence;
248 }
249 QueryLocation query_location{};
250 query_location.stream_id.Assign(static_cast<u32>(streamer_id));
251 query_location.query_id.Assign(static_cast<u32>(new_query_id));
252 const auto gen_caching_indexing = [](VAddr cur_addr) {
253 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
254 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
255 };
256 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
257 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
258 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
259
260 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
261 pointer, pointer_timestamp] {
262 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
263 if (!is_synced) [[likely]] {
264 impl->pending_unregister.push_back(query_location);
265 }
266 return;
267 }
268 if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
269 UNREACHABLE();
270 return;
271 }
272 query_base->value += streamer->GetAmmendValue();
273 streamer->SetAccumulationValue(query_base->value);
274 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
275 u64 timestamp = impl->gpu.GetTicks();
276 std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
277 std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
278 } else {
279 u32 value = static_cast<u32>(query_base->value);
280 std::memcpy(pointer, &value, sizeof(value));
281 }
282 if (!is_synced) [[likely]] {
283 impl->pending_unregister.push_back(query_location);
284 }
285 });
286 if (is_fence) {
287 impl->rasterizer.SignalFence(std::move(operation));
288 } else {
289 if (!Settings::IsGPULevelHigh() && counter_type == QueryType::Payload) {
290 if (has_timestamp) {
291 u64 timestamp = impl->gpu.GetTicks();
292 u64 value = static_cast<u64>(payload);
293 std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
294 std::memcpy(pointer, &value, sizeof(value));
295 } else {
296 std::memcpy(pointer, &payload, sizeof(payload));
297 }
298 streamer->Free(new_query_id);
299 return;
300 }
301 impl->rasterizer.SyncOperation(std::move(operation));
302 }
303 if (is_synced) {
304 streamer->Free(new_query_id);
305 return;
306 }
307 auto [cont_addr, base] = gen_caching_indexing(cpu_addr);
308 {
309 std::scoped_lock lock(cache_mutex);
310 auto it1 = cached_queries.try_emplace(cont_addr);
311 auto& sub_container = it1.first->second;
312 auto it_current = sub_container.find(base);
313 if (it_current == sub_container.end()) {
314 sub_container.insert_or_assign(base, query_location);
315 return;
316 }
317 auto* old_query = impl->ObtainQuery(it_current->second);
318 old_query->flags |= QueryFlagBits::IsRewritten;
319 sub_container.insert_or_assign(base, query_location);
320 }
321}
322
323template <typename Traits>
324void QueryCacheBase<Traits>::UnregisterPending() {
325 const auto gen_caching_indexing = [](VAddr cur_addr) {
326 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
327 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
328 };
329 std::scoped_lock lock(cache_mutex);
330 for (QueryLocation loc : impl->pending_unregister) {
331 const auto [streamer_id, query_id] = loc.unpack();
332 auto* streamer = impl->streamers[streamer_id];
333 if (!streamer) [[unlikely]] {
334 continue;
335 }
336 auto* query = streamer->GetQuery(query_id);
337 auto [cont_addr, base] = gen_caching_indexing(query->guest_address);
338 auto it1 = cached_queries.find(cont_addr);
339 if (it1 != cached_queries.end()) {
340 auto it2 = it1->second.find(base);
341 if (it2 != it1->second.end()) {
342 if (it2->second.raw == loc.raw) {
343 it1->second.erase(it2);
344 }
345 }
346 }
347 streamer->Free(query_id);
348 }
349 impl->pending_unregister.clear();
350}
351
352template <typename Traits>
353void QueryCacheBase<Traits>::NotifyWFI() {
354 bool should_sync = false;
355 impl->ForEachStreamer(
356 [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); });
357 if (!should_sync) {
358 return;
359 }
360
361 impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); });
362 impl->runtime.Barriers(true);
363 impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); });
364 impl->runtime.Barriers(false);
365}
366
367template <typename Traits>
368void QueryCacheBase<Traits>::NotifySegment(bool resume) {
369 if (resume) {
370 impl->runtime.ResumeHostConditionalRendering();
371 } else {
372 CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
373 CounterClose(VideoCommon::QueryType::StreamingByteCount);
374 impl->runtime.PauseHostConditionalRendering();
375 }
376}
377
378template <typename Traits>
379bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
380 bool qc_dirty = false;
381 const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData {
382 auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address);
383 if (!cpu_addr_opt) [[unlikely]] {
384 return VideoCommon::LookupData{
385 .address = 0,
386 .found_query = nullptr,
387 };
388 }
389 VAddr cpu_addr = *cpu_addr_opt;
390 std::scoped_lock lock(cache_mutex);
391 auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS);
392 if (it1 == cached_queries.end()) {
393 return VideoCommon::LookupData{
394 .address = cpu_addr,
395 .found_query = nullptr,
396 };
397 }
398 auto& sub_container = it1->second;
399 auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK);
400
401 if (it_current == sub_container.end()) {
402 auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4);
403 if (it_current_2 == sub_container.end()) {
404 return VideoCommon::LookupData{
405 .address = cpu_addr,
406 .found_query = nullptr,
407 };
408 }
409 }
410 auto* query = impl->ObtainQuery(it_current->second);
411 qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) &&
412 False(query->flags & QueryFlagBits::IsGuestSynced);
413 return VideoCommon::LookupData{
414 .address = cpu_addr,
415 .found_query = query,
416 };
417 };
418
419 auto& regs = maxwell3d->regs;
420 if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) {
421 impl->runtime.EndHostConditionalRendering();
422 return false;
423 }
424 const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
425 const GPUVAddr address = regs.render_enable.Address();
426 switch (mode) {
427 case ComparisonMode::True:
428 impl->runtime.EndHostConditionalRendering();
429 return false;
430 case ComparisonMode::False:
431 impl->runtime.EndHostConditionalRendering();
432 return false;
433 case ComparisonMode::Conditional: {
434 VideoCommon::LookupData object_1{gen_lookup(address)};
435 return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty);
436 }
437 case ComparisonMode::IfEqual: {
438 VideoCommon::LookupData object_1{gen_lookup(address)};
439 VideoCommon::LookupData object_2{gen_lookup(address + 16)};
440 return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
441 true);
442 }
443 case ComparisonMode::IfNotEqual: {
444 VideoCommon::LookupData object_1{gen_lookup(address)};
445 VideoCommon::LookupData object_2{gen_lookup(address + 16)};
446 return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
447 false);
448 }
449 default:
450 return false;
451 }
452}
453
454// Async downloads
455template <typename Traits>
456void QueryCacheBase<Traits>::CommitAsyncFlushes() {
457 // Make sure to have the results synced in Host.
458 NotifyWFI();
459
460 u64 mask{};
461 {
462 std::scoped_lock lk(impl->flush_guard);
463 impl->ForEachStreamer([&mask](StreamerInterface* streamer) {
464 bool local_result = streamer->HasUnsyncedQueries();
465 if (local_result) {
466 mask |= 1ULL << streamer->GetId();
467 }
468 });
469 impl->flushes_pending.push_back(mask);
470 }
471 std::function<void()> func([this] { UnregisterPending(); });
472 impl->rasterizer.SyncOperation(std::move(func));
473 if (mask == 0) {
474 return;
475 }
476 u64 ran_mask = ~mask;
477 while (mask) {
478 impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
479 u64 dep_mask = streamer->GetDependentMask();
480 if ((dep_mask & ~ran_mask) != 0) {
481 return;
482 }
483 u64 index = streamer->GetId();
484 ran_mask |= (1ULL << index);
485 mask &= ~(1ULL << index);
486 streamer->PushUnsyncedQueries();
487 });
488 }
489}
490
491template <typename Traits>
492bool QueryCacheBase<Traits>::HasUncommittedFlushes() const {
493 bool result = false;
494 impl->ForEachStreamer([&result](StreamerInterface* streamer) {
495 result |= streamer->HasUnsyncedQueries();
496 return result;
497 });
498 return result;
499}
500
501template <typename Traits>
502bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() {
503 std::scoped_lock lk(impl->flush_guard);
504 return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL;
505}
506
507template <typename Traits>
508void QueryCacheBase<Traits>::PopAsyncFlushes() {
509 u64 mask;
510 {
511 std::scoped_lock lk(impl->flush_guard);
512 mask = impl->flushes_pending.front();
513 impl->flushes_pending.pop_front();
514 }
515 if (mask == 0) {
516 return;
517 }
518 u64 ran_mask = ~mask;
519 while (mask) {
520 impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) {
521 u64 dep_mask = streamer->GetDependenceMask();
522 if ((dep_mask & ~ran_mask) != 0) {
523 return;
524 }
525 u64 index = streamer->GetId();
526 ran_mask |= (1ULL << index);
527 mask &= ~(1ULL << index);
528 streamer->PopUnsyncedQueries();
529 });
530 }
531}
532
533// Invalidation
534
535template <typename Traits>
536void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) {
537 auto* query_base = impl->ObtainQuery(location);
538 if (!query_base) {
539 return;
540 }
541 query_base->flags |= QueryFlagBits::IsInvalidated;
542}
543
544template <typename Traits>
545bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
546 auto* query_base = impl->ObtainQuery(location);
547 if (!query_base) {
548 return false;
549 }
550 return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
551 False(query_base->flags & QueryFlagBits::IsGuestSynced);
552}
553
554template <typename Traits>
555bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
556 auto* query_base = impl->ObtainQuery(location);
557 if (!query_base) {
558 return false;
559 }
560 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
561 False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
562 auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address);
563 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
564 std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
565 return false;
566 }
567 u32 value_l = static_cast<u32>(query_base->value);
568 std::memcpy(ptr, &value_l, sizeof(value_l));
569 return false;
570 }
571 return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
572 False(query_base->flags & QueryFlagBits::IsGuestSynced);
573}
574
575template <typename Traits>
576void QueryCacheBase<Traits>::RequestGuestHostSync() {
577 impl->rasterizer.ReleaseFences();
578}
579
580} // namespace VideoCommon
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
new file mode 100644
index 000000000..07be421c6
--- /dev/null
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -0,0 +1,181 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <functional>
7#include <mutex>
8#include <optional>
9#include <span>
10#include <unordered_map>
11#include <utility>
12
13#include "common/assert.h"
14#include "common/bit_field.h"
15#include "common/common_types.h"
16#include "core/memory.h"
17#include "video_core/control/channel_state_cache.h"
18#include "video_core/query_cache/query_base.h"
19#include "video_core/query_cache/types.h"
20
21namespace Core::Memory {
22class Memory;
23}
24
25namespace VideoCore {
26class RasterizerInterface;
27}
28
29namespace Tegra {
30class GPU;
31}
32
33namespace VideoCommon {
34
35struct LookupData {
36 VAddr address;
37 QueryBase* found_query;
38};
39
40template <typename Traits>
41class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
42 using RuntimeType = typename Traits::RuntimeType;
43
44public:
45 union QueryLocation {
46 BitField<27, 5, u32> stream_id;
47 BitField<0, 27, u32> query_id;
48 u32 raw;
49
50 std::pair<size_t, size_t> unpack() const {
51 return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
52 }
53 };
54
55 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_);
57
58 ~QueryCacheBase();
59
60 void InvalidateRegion(VAddr addr, std::size_t size) {
61 IterateCache<true>(addr, size,
62 [this](QueryLocation location) { InvalidateQuery(location); });
63 }
64
65 void FlushRegion(VAddr addr, std::size_t size) {
66 bool result = false;
67 IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
68 result |= SemiFlushQueryDirty(location);
69 return result;
70 });
71 if (result) {
72 RequestGuestHostSync();
73 }
74 }
75
76 static u64 BuildMask(std::span<const QueryType> types) {
77 u64 mask = 0;
78 for (auto query_type : types) {
79 mask |= 1ULL << (static_cast<u64>(query_type));
80 }
81 return mask;
82 }
83
84 /// Return true when a CPU region is modified from the GPU
85 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) {
86 bool result = false;
87 IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
88 result |= IsQueryDirty(location);
89 return result;
90 });
91 return result;
92 }
93
94 void CounterEnable(QueryType counter_type, bool is_enabled);
95
96 void CounterReset(QueryType counter_type);
97
98 void CounterClose(QueryType counter_type);
99
100 void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,
101 u32 payload, u32 subreport);
102
103 void NotifyWFI();
104
105 bool AccelerateHostConditionalRendering();
106
107 // Async downloads
108 void CommitAsyncFlushes();
109
110 bool HasUncommittedFlushes() const;
111
112 bool ShouldWaitAsyncFlushes();
113
114 void PopAsyncFlushes();
115
116 void NotifySegment(bool resume);
117
118 void BindToChannel(s32 id) override;
119
120protected:
121 template <bool remove_from_cache, typename Func>
122 void IterateCache(VAddr addr, std::size_t size, Func&& func) {
123 static constexpr bool RETURNS_BOOL =
124 std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>;
125 const u64 addr_begin = addr;
126 const u64 addr_end = addr_begin + size;
127
128 const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS;
129 std::scoped_lock lock(cache_mutex);
130 for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) {
131 const u64 page_start = page << Core::Memory::YUZU_PAGEBITS;
132 const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) {
133 const u64 cache_begin = page_start + query_location;
134 const u64 cache_end = cache_begin + sizeof(u32);
135 return cache_begin < addr_end && addr_begin < cache_end;
136 };
137 const auto& it = cached_queries.find(page);
138 if (it == std::end(cached_queries)) {
139 continue;
140 }
141 auto& contents = it->second;
142 for (auto& query : contents) {
143 if (!in_range(query.first)) {
144 continue;
145 }
146 if constexpr (RETURNS_BOOL) {
147 if (func(query.second)) {
148 return;
149 }
150 } else {
151 func(query.second);
152 }
153 }
154 if constexpr (remove_from_cache) {
155 const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) {
156 return in_range(pair.first);
157 };
158 std::erase_if(contents, in_range2);
159 }
160 }
161 }
162
163 using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
164
165 void InvalidateQuery(QueryLocation location);
166 bool IsQueryDirty(QueryLocation location);
167 bool SemiFlushQueryDirty(QueryLocation location);
168 void RequestGuestHostSync();
169 void UnregisterPending();
170
171 std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries;
172 std::mutex cache_mutex;
173
174 struct QueryCacheBaseImpl;
175 friend struct QueryCacheBaseImpl;
176 friend RuntimeType;
177
178 std::unique_ptr<QueryCacheBaseImpl> impl;
179};
180
181} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
new file mode 100644
index 000000000..39da6ac07
--- /dev/null
+++ b/src/video_core/query_cache/query_stream.h
@@ -0,0 +1,149 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <deque>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "video_core/query_cache/bank_base.h"
13#include "video_core/query_cache/query_base.h"
14
15namespace VideoCommon {
16
17class StreamerInterface {
18public:
19 explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {}
20 virtual ~StreamerInterface() = default;
21
22 virtual QueryBase* GetQuery(size_t id) = 0;
23
24 virtual void StartCounter() {
25 /* Do Nothing */
26 }
27
28 virtual void PauseCounter() {
29 /* Do Nothing */
30 }
31
32 virtual void ResetCounter() {
33 /* Do Nothing */
34 }
35
36 virtual void CloseCounter() {
37 /* Do Nothing */
38 }
39
40 virtual bool HasPendingSync() const {
41 return false;
42 }
43
44 virtual void PresyncWrites() {
45 /* Do Nothing */
46 }
47
48 virtual void SyncWrites() {
49 /* Do Nothing */
50 }
51
52 virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
53 std::optional<u32> subreport = std::nullopt) = 0;
54
55 virtual bool HasUnsyncedQueries() const {
56 return false;
57 }
58
59 virtual void PushUnsyncedQueries() {
60 /* Do Nothing */
61 }
62
63 virtual void PopUnsyncedQueries() {
64 /* Do Nothing */
65 }
66
67 virtual void Free(size_t query_id) = 0;
68
69 size_t GetId() const {
70 return id;
71 }
72
73 u64 GetDependenceMask() const {
74 return dependence_mask;
75 }
76
77 u64 GetDependentMask() const {
78 return dependence_mask;
79 }
80
81 u64 GetAmmendValue() const {
82 return ammend_value;
83 }
84
85 void SetAccumulationValue(u64 new_value) {
86 acumulation_value = new_value;
87 }
88
89protected:
90 void MakeDependent(StreamerInterface* depend_on) {
91 dependence_mask |= 1ULL << depend_on->id;
92 depend_on->dependent_mask |= 1ULL << id;
93 }
94
95 const size_t id;
96 u64 dependence_mask;
97 u64 dependent_mask;
98 u64 ammend_value{};
99 u64 acumulation_value{};
100};
101
102template <typename QueryType>
103class SimpleStreamer : public StreamerInterface {
104public:
105 explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
106 virtual ~SimpleStreamer() = default;
107
108protected:
109 virtual QueryType* GetQuery(size_t query_id) override {
110 if (query_id < slot_queries.size()) {
111 return &slot_queries[query_id];
112 }
113 return nullptr;
114 }
115
116 virtual void Free(size_t query_id) override {
117 std::scoped_lock lk(guard);
118 ReleaseQuery(query_id);
119 }
120
121 template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))>
122 size_t BuildQuery(Args&&... args) {
123 std::scoped_lock lk(guard);
124 if (!old_queries.empty()) {
125 size_t new_id = old_queries.front();
126 old_queries.pop_front();
127 new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...);
128 return new_id;
129 }
130 size_t new_id = slot_queries.size();
131 slot_queries.emplace_back(std::forward<Args>(args)...);
132 return new_id;
133 }
134
135 void ReleaseQuery(size_t query_id) {
136
137 if (query_id < slot_queries.size()) {
138 old_queries.push_back(query_id);
139 return;
140 }
141 UNREACHABLE();
142 }
143
144 std::mutex guard;
145 std::deque<QueryType> slot_queries;
146 std::deque<size_t> old_queries;
147};
148
149} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/types.h b/src/video_core/query_cache/types.h
new file mode 100644
index 000000000..e9226bbfc
--- /dev/null
+++ b/src/video_core/query_cache/types.h
@@ -0,0 +1,74 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11enum class QueryPropertiesFlags : u32 {
12 HasTimeout = 1 << 0,
13 IsAFence = 1 << 1,
14};
15DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags)
16
17// This should always be equivalent to maxwell3d Report Semaphore Reports
18enum class QueryType : u32 {
19 Payload = 0, // "None" in docs, but confirmed via hardware to return the payload
20 VerticesGenerated = 1,
21 ZPassPixelCount = 2,
22 PrimitivesGenerated = 3,
23 AlphaBetaClocks = 4,
24 VertexShaderInvocations = 5,
25 StreamingPrimitivesNeededMinusSucceeded = 6,
26 GeometryShaderInvocations = 7,
27 GeometryShaderPrimitivesGenerated = 9,
28 ZCullStats0 = 10,
29 StreamingPrimitivesSucceeded = 11,
30 ZCullStats1 = 12,
31 StreamingPrimitivesNeeded = 13,
32 ZCullStats2 = 14,
33 ClipperInvocations = 15,
34 ZCullStats3 = 16,
35 ClipperPrimitivesGenerated = 17,
36 VtgPrimitivesOut = 18,
37 PixelShaderInvocations = 19,
38 ZPassPixelCount64 = 21,
39 IEEECleanColorTarget = 24,
40 IEEECleanZetaTarget = 25,
41 StreamingByteCount = 26,
42 TessellationInitInvocations = 27,
43 BoundingRectangle = 28,
44 TessellationShaderInvocations = 29,
45 TotalStreamingPrimitivesNeededMinusSucceeded = 30,
46 TessellationShaderPrimitivesGenerated = 31,
47 // max.
48 MaxQueryTypes,
49};
50
51// Comparison modes for Host Conditional Rendering
52enum class ComparisonMode : u32 {
53 False = 0,
54 True = 1,
55 Conditional = 2,
56 IfEqual = 3,
57 IfNotEqual = 4,
58 MaxComparisonMode,
59};
60
61// Reduction ops.
62enum class ReductionOp : u32 {
63 RedAdd = 0,
64 RedMin = 1,
65 RedMax = 2,
66 RedInc = 3,
67 RedDec = 4,
68 RedAnd = 5,
69 RedOr = 6,
70 RedXor = 7,
71 MaxReductionOp,
72};
73
74} // namespace VideoCommon \ No newline at end of file