summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-08-04 03:31:52 +0200
committerGravatar Fernando Sahmkow2023-09-23 23:05:29 +0200
commitbdc01254a9b3ce8359f8f007c2102cb2d112418e (patch)
treeb75b974c0751f83089d64957df567e0d138981b1 /src
parentMerge pull request #11567 from liamwhite/fixing-my-error (diff)
downloadyuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.gz
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.xz
yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.zip
Query Cache: Setup Base rework
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/query_cache/bank_base.h106
-rw-r--r--src/video_core/query_cache/query_base.h72
-rw-r--r--src/video_core/query_cache/query_cache.h543
-rw-r--r--src/video_core/query_cache/query_cache_base.h181
-rw-r--r--src/video_core/query_cache/query_stream.h125
-rw-r--r--src/video_core/query_cache/types.h74
7 files changed, 1107 insertions, 0 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 9b13ccbab..cf9266d54 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -95,6 +95,12 @@ add_library(video_core STATIC
95 memory_manager.h 95 memory_manager.h
96 precompiled_headers.h 96 precompiled_headers.h
97 pte_kind.h 97 pte_kind.h
98 query_cache/bank_base.h
99 query_cache/query_base.h
100 query_cache/query_cache_base.h
101 query_cache/query_cache.h
102 query_cache/query_stream.h
103 query_cache/types.h
98 query_cache.h 104 query_cache.h
99 rasterizer_accelerated.cpp 105 rasterizer_accelerated.cpp
100 rasterizer_accelerated.h 106 rasterizer_accelerated.h
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h
new file mode 100644
index 000000000..4246a609d
--- /dev/null
+++ b/src/video_core/query_cache/bank_base.h
@@ -0,0 +1,106 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <atomic>
7#include <deque>
8#include <utility>
9
10
11#include "common/common_types.h"
12
13namespace VideoCommon {
14
15class BankBase {
16protected:
17 const size_t base_bank_size;
18 size_t bank_size;
19 std::atomic<size_t> references;
20 size_t current_slot;
21
22public:
23 BankBase(size_t bank_size_)
24 : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {}
25
26 virtual ~BankBase() = default;
27
28 virtual std::pair<bool, size_t> Reserve() {
29 if (IsClosed()) {
30 return {false, bank_size};
31 }
32 const size_t result = current_slot++;
33 return {true, result};
34 }
35
36 virtual void Reset() {
37 current_slot = 0;
38 references = 0;
39 bank_size = base_bank_size;
40 }
41
42 size_t Size() const {
43 return bank_size;
44 }
45
46 void AddReference(size_t how_many = 1) {
47 references.fetch_add(how_many, std::memory_order_relaxed);
48 }
49
50 void CloseReference(size_t how_many = 1) {
51 if (how_many > references.load(std::memory_order_relaxed)) {
52 UNREACHABLE();
53 }
54 references.fetch_sub(how_many, std::memory_order_relaxed);
55 }
56
57 void Close() {
58 bank_size = current_slot;
59 }
60
61 constexpr bool IsClosed() {
62 return current_slot >= bank_size;
63 }
64
65 bool IsDead() {
66 return IsClosed() && references == 0;
67 }
68};
69
70template <typename BankType>
71class BankPool {
72private:
73 std::deque<BankType> bank_pool;
74 std::deque<size_t> bank_indices;
75
76public:
77 BankPool() = default;
78 ~BankPool() = default;
79
80 // Reserve a bank from the pool and return its index
81 template <typename Func>
82 size_t ReserveBank(Func&& builder) {
83 if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) {
84 size_t new_index = bank_indices.front();
85 bank_indices.pop_front();
86 bank_pool[new_index].Reset();
87 return new_index;
88 }
89 size_t new_index = bank_pool.size();
90 builder(bank_pool, new_index);
91 bank_indices.push_back(new_index);
92 return new_index;
93 }
94
95 // Get a reference to a bank using its index
96 BankType& GetBank(size_t index) {
97 return bank_pool[index];
98 }
99
100 // Get the total number of banks in the pool
101 size_t BankCount() const {
102 return bank_pool.size();
103 }
104};
105
106} // namespace VideoCommon
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
new file mode 100644
index 000000000..485ed669c
--- /dev/null
+++ b/src/video_core/query_cache/query_base.h
@@ -0,0 +1,72 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11enum class QueryFlagBits : u32 {
12 HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp.
13 IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host
14 IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host
15 IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest.
16 IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query
17 IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query
18 IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
19 IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
20 IsFence = 1 << 8, ///< Indicates the query is a fence.
21};
22DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
23
24class QueryBase {
25public:
26 VAddr guest_address;
27 QueryFlagBits flags;
28 u64 value;
29
30protected:
31 // Default constructor
32 QueryBase() : guest_address(0), flags{}, value{} {}
33
34 // Parameterized constructor
35 QueryBase(VAddr address, QueryFlagBits flags_, u64 value_)
36 : guest_address(address), flags(flags_), value{value_} {}
37};
38
39class GuestQuery : public QueryBase {
40public:
41 // Parameterized constructor
42 GuestQuery(bool isLong, VAddr address, u64 queryValue)
43 : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) {
44 if (isLong) {
45 flags |= QueryFlagBits::HasTimestamp;
46 }
47 }
48};
49
50class HostQueryBase : public QueryBase {
51public:
52 // Default constructor
53 HostQueryBase()
54 : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{},
55 size_banks{}, start_slot{}, size_slots{} {}
56
57 // Parameterized constructor
58 HostQueryBase(bool isLong, VAddr address)
59 : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{},
60 start_slot{}, size_slots{} {
61 if (isLong) {
62 flags |= QueryFlagBits::HasTimestamp;
63 }
64 }
65
66 u32 start_bank_id;
67 u32 size_banks;
68 size_t start_slot;
69 size_t size_slots;
70};
71
72} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
new file mode 100644
index 000000000..f6af48d14
--- /dev/null
+++ b/src/video_core/query_cache/query_cache.h
@@ -0,0 +1,543 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <array>
7#include <deque>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <utility>
12
13#include "common/assert.h"
14#include "common/common_types.h"
15#include "common/logging/log.h"
16#include "common/scope_exit.h"
17#include "common/settings.h"
18#include "core/memory.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h"
21#include "video_core/memory_manager.h"
22#include "video_core/query_cache/bank_base.h"
23#include "video_core/query_cache/query_base.h"
24#include "video_core/query_cache/query_cache_base.h"
25#include "video_core/query_cache/query_stream.h"
26#include "video_core/query_cache/types.h"
27
28namespace VideoCommon {
29
30using Maxwell = Tegra::Engines::Maxwell3D;
31
32struct SyncValuesStruct {
33 VAddr address;
34 u64 value;
35 u64 size;
36
37 static constexpr bool GeneratesBaseBuffer = true;
38};
39
40template <typename Traits>
41class GuestStreamer : public SimpleStreamer<GuestQuery> {
42public:
43 using RuntimeType = typename Traits::RuntimeType;
44
45 GuestStreamer(size_t id_, RuntimeType& runtime_)
46 : SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {}
47
48 virtual ~GuestStreamer() = default;
49
50 size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
51 std::optional<u32> subreport = std::nullopt) override {
52 auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value));
53 pending_sync.push_back(new_id);
54 return new_id;
55 }
56
57 bool HasPendingSync() override {
58 return !pending_sync.empty();
59 }
60
61 void SyncWrites() override {
62 if (pending_sync.empty()) {
63 return;
64 }
65 std::vector<SyncValuesStruct> sync_values;
66 sync_values.reserve(pending_sync.size());
67 for (size_t pending_id : pending_sync) {
68 auto& query = slot_queries[pending_id];
69 if (True(query.flags & QueryFlagBits::IsRewritten) ||
70 True(query.flags & QueryFlagBits::IsInvalidated)) {
71 continue;
72 }
73 query.flags |= QueryFlagBits::IsHostSynced;
74 sync_values.emplace_back(query.guest_address, query.value,
75 True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4);
76 }
77 pending_sync.clear();
78 if (sync_values.size() > 0) {
79 runtime.template SyncValues<SyncValuesStruct>(sync_values);
80 }
81 }
82
83private:
84 RuntimeType& runtime;
85 std::deque<size_t> pending_sync;
86};
87
88template <typename Traits>
89class StubStreamer : public GuestStreamer<Traits> {
90public:
91 using RuntimeType = typename Traits::RuntimeType;
92
93 StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {}
94
95 ~StubStreamer() override = default;
96
97 size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value,
98 std::optional<u32> subreport = std::nullopt) override {
99 size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport);
100 return new_id;
101 }
102};
103
104template <typename Traits>
105struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
106 using RuntimeType = typename Traits::RuntimeType;
107
108 QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
109 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_)
110 : owner{owner_}, rasterizer{rasterizer_},
111 cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} {
112 streamer_mask = 0;
113 for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
114 streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
115 if (streamers[i]) {
116 streamer_mask |= 1ULL << i;
117 }
118 }
119 }
120
121 template <typename Func>
122 void ForEachStreamerIn(u64 mask, Func&& func) {
123 static constexpr bool RETURNS_BOOL =
124 std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>;
125 while (mask != 0) {
126 size_t position = std::countr_zero(mask);
127 mask &= ~(1ULL << position);
128 if constexpr (RETURNS_BOOL) {
129 if (func(streamers[position])) {
130 return;
131 }
132 } else {
133 func(streamers[position]);
134 }
135 }
136 }
137
138 template <typename Func>
139 void ForEachStreamer(Func&& func) {
140 ForEachStreamerIn(streamer_mask, func);
141 }
142
143 QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) {
144 size_t which_stream = location.stream_id.Value();
145 auto* streamer = streamers[which_stream];
146 if (!streamer) {
147 return nullptr;
148 }
149 return streamer->GetQuery(location.query_id.Value());
150 }
151
152 QueryCacheBase<Traits>* owner;
153 VideoCore::RasterizerInterface& rasterizer;
154 Core::Memory::Memory& cpu_memory;
155 Traits::RuntimeType& runtime;
156 Tegra::GPU& gpu;
157 std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers;
158 u64 streamer_mask;
159 std::mutex flush_guard;
160 std::deque<u64> flushes_pending;
161 std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister;
162};
163
164template <typename Traits>
165QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_,
166 VideoCore::RasterizerInterface& rasterizer_,
167 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_)
168 : cached_queries{} {
169 impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>(
170 this, rasterizer_, cpu_memory_, runtime_, gpu_);
171}
172
173template <typename Traits>
174QueryCacheBase<Traits>::~QueryCacheBase() = default;
175
176template <typename Traits>
177void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) {
178 size_t index = static_cast<size_t>(counter_type);
179 StreamerInterface* streamer = impl->streamers[index];
180 if (!streamer) [[unlikely]] {
181 UNREACHABLE();
182 return;
183 }
184 if (is_enabled) {
185 streamer->StartCounter();
186 } else {
187 streamer->PauseCounter();
188 }
189}
190
191template <typename Traits>
192void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) {
193 size_t index = static_cast<size_t>(counter_type);
194 StreamerInterface* streamer = impl->streamers[index];
195 if (!streamer) [[unlikely]] {
196 UNREACHABLE();
197 return;
198 }
199 streamer->CloseCounter();
200}
201
202template <typename Traits>
203void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) {
204 size_t index = static_cast<size_t>(counter_type);
205 StreamerInterface* streamer = impl->streamers[index];
206 if (!streamer) [[unlikely]] {
207 UNIMPLEMENTED();
208 return;
209 }
210 streamer->ResetCounter();
211}
212
213template <typename Traits>
214void QueryCacheBase<Traits>::BindToChannel(s32 id) {
215 VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id);
216 impl->runtime.Bind3DEngine(maxwell3d);
217}
218
219template <typename Traits>
220void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type,
221 QueryPropertiesFlags flags, u32 payload, u32 subreport) {
222 const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout);
223 const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence);
224 size_t streamer_id = static_cast<size_t>(counter_type);
225 auto* streamer = impl->streamers[streamer_id];
226 if (!streamer) [[unlikely]] {
227 if (has_timestamp) {
228 u64 timestamp = impl->gpu.GetTicks();
229 gpu_memory->Write<u64>(addr + 8, timestamp);
230 gpu_memory->Write<u64>(addr, 1ULL);
231 } else {
232 gpu_memory->Write<u32>(addr, 1U);
233 }
234 return;
235 }
236 auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr);
237 if (!cpu_addr_opt) [[unlikely]] {
238 return;
239 }
240 VAddr cpu_addr = *cpu_addr_opt;
241 const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport);
242 auto* query = streamer->GetQuery(new_query_id);
243 if (is_fence) {
244 query->flags |= QueryFlagBits::IsFence;
245 }
246 QueryLocation query_location{};
247 query_location.stream_id.Assign(static_cast<u32>(streamer_id));
248 query_location.query_id.Assign(static_cast<u32>(new_query_id));
249 const auto gen_caching_indexing = [](VAddr cur_addr) {
250 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
251 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
252 };
253 u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
254 u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
255 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
256 std::function<void()> operation(
257 [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] {
258 if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
259 if (!is_synced) [[likely]] {
260 impl->pending_unregister.push_back(query_location);
261 }
262 return;
263 }
264 if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
265 UNREACHABLE();
266 return;
267 }
268 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
269 u64 timestamp = impl->gpu.GetTicks();
270 std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
271 std::memcpy(pointer, &query_base->value, sizeof(query_base->value));
272 } else {
273 u32 value = static_cast<u32>(query_base->value);
274 std::memcpy(pointer, &value, sizeof(value));
275 }
276 if (!is_synced) [[likely]] {
277 impl->pending_unregister.push_back(query_location);
278 }
279 });
280 if (is_fence) {
281 impl->rasterizer.SignalFence(std::move(operation));
282 } else {
283 impl->rasterizer.SyncOperation(std::move(operation));
284 }
285 if (is_synced) {
286 streamer->Free(new_query_id);
287 return;
288 }
289 auto [cont_addr, base] = gen_caching_indexing(cpu_addr);
290 {
291 std::scoped_lock lock(cache_mutex);
292 auto it1 = cached_queries.try_emplace(cont_addr);
293 auto& sub_container = it1.first->second;
294 auto it_current = sub_container.find(base);
295 if (it_current == sub_container.end()) {
296 sub_container.insert_or_assign(base, query_location);
297 return;
298 }
299 auto* old_query = impl->ObtainQuery(it_current->second);
300 old_query->flags |= QueryFlagBits::IsRewritten;
301 sub_container.insert_or_assign(base, query_location);
302 }
303}
304
305template <typename Traits>
306void QueryCacheBase<Traits>::UnregisterPending() {
307 const auto gen_caching_indexing = [](VAddr cur_addr) {
308 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
309 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
310 };
311 std::scoped_lock lock(cache_mutex);
312 for (QueryLocation loc : impl->pending_unregister) {
313 const auto [streamer_id, query_id] = loc.unpack();
314 auto* streamer = impl->streamers[streamer_id];
315 if (!streamer) [[unlikely]] {
316 continue;
317 }
318 auto* query = streamer->GetQuery(query_id);
319 auto [cont_addr, base] = gen_caching_indexing(query->guest_address);
320 auto it1 = cached_queries.find(cont_addr);
321 if (it1 != cached_queries.end()) {
322 auto it2 = it1->second.find(base);
323 if (it2 != it1->second.end()) {
324 if (it2->second.raw == loc.raw) {
325 it1->second.erase(it2);
326 }
327 }
328 }
329 streamer->Free(query_id);
330 }
331 impl->pending_unregister.clear();
332}
333
334template <typename Traits>
335void QueryCacheBase<Traits>::NotifyWFI() {
336 bool should_sync = false;
337 impl->ForEachStreamer(
338 [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); });
339 if (!should_sync) {
340 return;
341 }
342
343 impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); });
344 impl->runtime.Barriers(true);
345 impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); });
346 impl->runtime.Barriers(false);
347}
348
349template <typename Traits>
350void QueryCacheBase<Traits>::NotifySegment(bool resume) {
351 if (resume) {
352 impl->runtime.ResumeHostConditionalRendering();
353 } else {
354 impl->runtime.PauseHostConditionalRendering();
355 CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
356 CounterClose(VideoCommon::QueryType::StreamingByteCount);
357 }
358}
359
360template <typename Traits>
361bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() {
362 bool qc_dirty = false;
363 const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData {
364 auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address);
365 if (!cpu_addr_opt) [[unlikely]] {
366 return VideoCommon::LookupData{
367 .address = 0,
368 .found_query = nullptr,
369 };
370 }
371 VAddr cpu_addr = *cpu_addr_opt;
372 std::scoped_lock lock(cache_mutex);
373 auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS);
374 if (it1 == cached_queries.end()) {
375 return VideoCommon::LookupData{
376 .address = cpu_addr,
377 .found_query = nullptr,
378 };
379 }
380 auto& sub_container = it1->second;
381 auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK);
382
383 if (it_current == sub_container.end()) {
384 auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4);
385 if (it_current_2 == sub_container.end()) {
386 return VideoCommon::LookupData{
387 .address = cpu_addr,
388 .found_query = nullptr,
389 };
390 }
391 }
392 auto* query = impl->ObtainQuery(it_current->second);
393 qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) &&
394 False(query->flags & QueryFlagBits::IsGuestSynced);
395 return VideoCommon::LookupData{
396 .address = cpu_addr,
397 .found_query = query,
398 };
399 };
400
401 auto& regs = maxwell3d->regs;
402 if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) {
403 impl->runtime.EndHostConditionalRendering();
404 return false;
405 }
406 /*if (!Settings::IsGPULevelHigh()) {
407 impl->runtime.EndHostConditionalRendering();
408 return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24,
409 VideoCommon::CacheType::BufferCache |
410 VideoCommon::CacheType::QueryCache);
411 }*/
412 const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode);
413 const GPUVAddr address = regs.render_enable.Address();
414 switch (mode) {
415 case ComparisonMode::True:
416 impl->runtime.EndHostConditionalRendering();
417 return false;
418 case ComparisonMode::False:
419 impl->runtime.EndHostConditionalRendering();
420 return false;
421 case ComparisonMode::Conditional: {
422 VideoCommon::LookupData object_1{gen_lookup(address)};
423 return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty);
424 }
425 case ComparisonMode::IfEqual: {
426 VideoCommon::LookupData object_1{gen_lookup(address)};
427 VideoCommon::LookupData object_2{gen_lookup(address + 16)};
428 return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
429 true);
430 }
431 case ComparisonMode::IfNotEqual: {
432 VideoCommon::LookupData object_1{gen_lookup(address)};
433 VideoCommon::LookupData object_2{gen_lookup(address + 16)};
434 return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty,
435 false);
436 }
437 default:
438 return false;
439 }
440}
441
442// Async downloads
443template <typename Traits>
444void QueryCacheBase<Traits>::CommitAsyncFlushes() {
445 u64 mask{};
446 {
447 std::scoped_lock lk(impl->flush_guard);
448 impl->ForEachStreamer([&mask](StreamerInterface* streamer) {
449 bool local_result = streamer->HasUnsyncedQueries();
450 if (local_result) {
451 mask |= 1ULL << streamer->GetId();
452 }
453 });
454 impl->flushes_pending.push_back(mask);
455 }
456 std::function<void()> func([this] { UnregisterPending(); });
457 impl->rasterizer.SyncOperation(std::move(func));
458 if (mask == 0) {
459 return;
460 }
461 impl->ForEachStreamerIn(mask,
462 [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); });
463}
464
465template <typename Traits>
466bool QueryCacheBase<Traits>::HasUncommittedFlushes() const {
467 bool result = false;
468 impl->ForEachStreamer([&result](StreamerInterface* streamer) {
469 result |= streamer->HasUnsyncedQueries();
470 return result;
471 });
472 return result;
473}
474
475template <typename Traits>
476bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() {
477 std::scoped_lock lk(impl->flush_guard);
478 return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL;
479}
480
481template <typename Traits>
482void QueryCacheBase<Traits>::PopAsyncFlushes() {
483 u64 mask;
484 {
485 std::scoped_lock lk(impl->flush_guard);
486 mask = impl->flushes_pending.front();
487 impl->flushes_pending.pop_front();
488 }
489 if (mask == 0) {
490 return;
491 }
492 impl->ForEachStreamerIn(mask,
493 [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); });
494}
495
496// Invalidation
497
498template <typename Traits>
499void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) {
500 auto* query_base = impl->ObtainQuery(location);
501 if (!query_base) {
502 return;
503 }
504 query_base->flags |= QueryFlagBits::IsInvalidated;
505}
506
507template <typename Traits>
508bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
509 auto* query_base = impl->ObtainQuery(location);
510 if (!query_base) {
511 return false;
512 }
513 return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
514 False(query_base->flags & QueryFlagBits::IsGuestSynced);
515}
516
517template <typename Traits>
518bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) {
519 auto* query_base = impl->ObtainQuery(location);
520 if (!query_base) {
521 return false;
522 }
523 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
524 False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
525 auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address);
526 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
527 std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
528 return false;
529 }
530 u32 value_l = static_cast<u32>(query_base->value);
531 std::memcpy(ptr, &value_l, sizeof(value_l));
532 return false;
533 }
534 return True(query_base->flags & QueryFlagBits::IsHostManaged) &&
535 False(query_base->flags & QueryFlagBits::IsGuestSynced);
536}
537
538template <typename Traits>
539void QueryCacheBase<Traits>::RequestGuestHostSync() {
540 impl->rasterizer.ReleaseFences();
541}
542
543} // namespace VideoCommon
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h
new file mode 100644
index 000000000..55f508dd1
--- /dev/null
+++ b/src/video_core/query_cache/query_cache_base.h
@@ -0,0 +1,181 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <functional>
7#include <mutex>
8#include <optional>
9#include <span>
10#include <unordered_map>
11#include <utility>
12
13#include "common/assert.h"
14#include "common/bit_field.h"
15#include "common/common_types.h"
16#include "core/memory.h"
17#include "video_core/control/channel_state_cache.h"
18#include "video_core/query_cache/query_base.h"
19#include "video_core/query_cache/types.h"
20
21namespace Core::Memory {
22class Memory;
23}
24
25namespace VideoCore {
26class RasterizerInterface;
27}
28
29namespace Tegra {
30class GPU;
31}
32
33namespace VideoCommon {
34
35struct LookupData {
36 VAddr address;
37 QueryBase* found_query;
38};
39
40template <typename Traits>
41class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
42 using RuntimeType = typename Traits::RuntimeType;
43
44public:
45 union QueryLocation {
46 BitField<27, 5, u32> stream_id;
47 BitField<0, 27, u32> query_id;
48 u32 raw;
49
50 std::pair<size_t, size_t> unpack() {
51 return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())};
52 }
53 };
54
55 explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_,
56 Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_);
57
58 ~QueryCacheBase();
59
60 void InvalidateRegion(VAddr addr, std::size_t size) {
61 IterateCache<true>(addr, size,
62 [this](QueryLocation location) { InvalidateQuery(location); });
63 }
64
65 void FlushRegion(VAddr addr, std::size_t size) {
66 bool result = false;
67 IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
68 result |= SemiFlushQueryDirty(location);
69 return result;
70 });
71 if (result) {
72 RequestGuestHostSync();
73 }
74 }
75
76 static u64 BuildMask(std::span<QueryType> types) {
77 u64 mask = 0;
78 for (auto query_type : types) {
79 mask |= 1ULL << (static_cast<u64>(query_type));
80 }
81 return mask;
82 }
83
84 /// Return true when a CPU region is modified from the GPU
85 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) {
86 bool result = false;
87 IterateCache<false>(addr, size, [this, &result](QueryLocation location) {
88 result |= IsQueryDirty(location);
89 return result;
90 });
91 return result;
92 }
93
94 void CounterEnable(QueryType counter_type, bool is_enabled);
95
96 void CounterReset(QueryType counter_type);
97
98 void CounterClose(QueryType counter_type);
99
100 void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags,
101 u32 payload, u32 subreport);
102
103 void NotifyWFI();
104
105 bool AccelerateHostConditionalRendering();
106
107 // Async downloads
108 void CommitAsyncFlushes();
109
110 bool HasUncommittedFlushes() const;
111
112 bool ShouldWaitAsyncFlushes();
113
114 void PopAsyncFlushes();
115
116 void NotifySegment(bool resume);
117
118 void BindToChannel(s32 id) override;
119
120protected:
121 template <bool remove_from_cache, typename Func>
122 void IterateCache(VAddr addr, std::size_t size, Func&& func) {
123 static constexpr bool RETURNS_BOOL =
124 std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>;
125 const u64 addr_begin = addr;
126 const u64 addr_end = addr_begin + size;
127
128 const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS;
129 std::scoped_lock lock(cache_mutex);
130 for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) {
131 const u64 page_start = page << Core::Memory::YUZU_PAGEBITS;
132 const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) {
133 const u64 cache_begin = page_start + query_location;
134 const u64 cache_end = cache_begin + sizeof(u32);
135 return cache_begin < addr_end && addr_begin < cache_end;
136 };
137 const auto& it = cached_queries.find(page);
138 if (it == std::end(cached_queries)) {
139 continue;
140 }
141 auto& contents = it->second;
142 for (auto& query : contents) {
143 if (!in_range(query.first)) {
144 continue;
145 }
146 if constexpr (RETURNS_BOOL) {
147 if (func(query.second)) {
148 return;
149 }
150 } else {
151 func(query.second);
152 }
153 }
154 if constexpr (remove_from_cache) {
155 const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) {
156 return in_range(pair.first);
157 };
158 std::erase_if(contents, in_range2);
159 }
160 }
161 }
162
163 using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>;
164
165 void InvalidateQuery(QueryLocation location);
166 bool IsQueryDirty(QueryLocation location);
167 bool SemiFlushQueryDirty(QueryLocation location);
168 void RequestGuestHostSync();
169 void UnregisterPending();
170
171 std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries;
172 std::mutex cache_mutex;
173
174 struct QueryCacheBaseImpl;
175 friend struct QueryCacheBaseImpl;
176 friend RuntimeType;
177
178 std::unique_ptr<struct QueryCacheBaseImpl> impl;
179};
180
181} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
new file mode 100644
index 000000000..dd5f95b3c
--- /dev/null
+++ b/src/video_core/query_cache/query_stream.h
@@ -0,0 +1,125 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <deque>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "common/common_types.h"
12#include "video_core/query_cache/bank_base.h"
13#include "video_core/query_cache/query_base.h"
14
15namespace VideoCommon {
16
17class StreamerInterface {
18public:
19 StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {}
20 virtual ~StreamerInterface() = default;
21
22 virtual QueryBase* GetQuery(size_t id) = 0;
23
24 virtual void StartCounter() {
25 /* Do Nothing */
26 }
27
28 virtual void PauseCounter() {
29 /* Do Nothing */
30 }
31
32 virtual void ResetCounter() {
33 /* Do Nothing */
34 }
35
36 virtual void CloseCounter() {
37 /* Do Nothing */
38 }
39
40 virtual bool HasPendingSync() {
41 return false;
42 }
43
44 virtual void PresyncWrites() {
45 /* Do Nothing */
46 }
47
48 virtual void SyncWrites() {
49 /* Do Nothing */
50 }
51
52 virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
53 std::optional<u32> subreport = std::nullopt) = 0;
54
55 virtual bool HasUnsyncedQueries() {
56 return false;
57 }
58
59 virtual void PushUnsyncedQueries() {
60 /* Do Nothing */
61 }
62
63 virtual void PopUnsyncedQueries() {
64 /* Do Nothing */
65 }
66
67 virtual void Free(size_t query_id) = 0;
68
69 size_t GetId() const {
70 return id;
71 }
72
73protected:
74 const size_t id;
75 const u64 dependance_mask;
76};
77
78template <typename QueryType>
79class SimpleStreamer : public StreamerInterface {
80public:
81 SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
82 virtual ~SimpleStreamer() = default;
83
84protected:
85 virtual QueryType* GetQuery(size_t query_id) override {
86 if (query_id < slot_queries.size()) {
87 return &slot_queries[query_id];
88 }
89 return nullptr;
90 }
91
92 virtual void Free(size_t query_id) override {
93 std::scoped_lock lk(guard);
94 ReleaseQuery(query_id);
95 }
96
97 template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))>
98 size_t BuildQuery(Args&&... args) {
99 std::scoped_lock lk(guard);
100 if (!old_queries.empty()) {
101 size_t new_id = old_queries.front();
102 old_queries.pop_front();
103 new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...);
104 return new_id;
105 }
106 size_t new_id = slot_queries.size();
107 slot_queries.emplace_back(std::forward<Args>(args)...);
108 return new_id;
109 }
110
111 void ReleaseQuery(size_t query_id) {
112
113 if (query_id < slot_queries.size()) {
114 old_queries.push_back(query_id);
115 return;
116 }
117 UNREACHABLE();
118 }
119
120 std::mutex guard;
121 std::deque<QueryType> slot_queries;
122 std::deque<size_t> old_queries;
123};
124
125} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/query_cache/types.h b/src/video_core/query_cache/types.h
new file mode 100644
index 000000000..e9226bbfc
--- /dev/null
+++ b/src/video_core/query_cache/types.h
@@ -0,0 +1,74 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11enum class QueryPropertiesFlags : u32 {
12 HasTimeout = 1 << 0,
13 IsAFence = 1 << 1,
14};
15DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags)
16
17// This should always be equivalent to maxwell3d Report Semaphore Reports
18enum class QueryType : u32 {
19 Payload = 0, // "None" in docs, but confirmed via hardware to return the payload
20 VerticesGenerated = 1,
21 ZPassPixelCount = 2,
22 PrimitivesGenerated = 3,
23 AlphaBetaClocks = 4,
24 VertexShaderInvocations = 5,
25 StreamingPrimitivesNeededMinusSucceeded = 6,
26 GeometryShaderInvocations = 7,
27 GeometryShaderPrimitivesGenerated = 9,
28 ZCullStats0 = 10,
29 StreamingPrimitivesSucceeded = 11,
30 ZCullStats1 = 12,
31 StreamingPrimitivesNeeded = 13,
32 ZCullStats2 = 14,
33 ClipperInvocations = 15,
34 ZCullStats3 = 16,
35 ClipperPrimitivesGenerated = 17,
36 VtgPrimitivesOut = 18,
37 PixelShaderInvocations = 19,
38 ZPassPixelCount64 = 21,
39 IEEECleanColorTarget = 24,
40 IEEECleanZetaTarget = 25,
41 StreamingByteCount = 26,
42 TessellationInitInvocations = 27,
43 BoundingRectangle = 28,
44 TessellationShaderInvocations = 29,
45 TotalStreamingPrimitivesNeededMinusSucceeded = 30,
46 TessellationShaderPrimitivesGenerated = 31,
47 // max.
48 MaxQueryTypes,
49};
50
51// Comparison modes for Host Conditional Rendering
52enum class ComparisonMode : u32 {
53 False = 0,
54 True = 1,
55 Conditional = 2,
56 IfEqual = 3,
57 IfNotEqual = 4,
58 MaxComparisonMode,
59};
60
61// Reduction ops.
62enum class ReductionOp : u32 {
63 RedAdd = 0,
64 RedMin = 1,
65 RedMax = 2,
66 RedInc = 3,
67 RedDec = 4,
68 RedAnd = 5,
69 RedOr = 6,
70 RedXor = 7,
71 MaxReductionOp,
72};
73
74} // namespace VideoCommon \ No newline at end of file