summaryrefslogtreecommitdiff
path: root/src/video_core/query_cache.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/query_cache.h')
-rw-r--r--src/video_core/query_cache.h359
1 files changed, 359 insertions, 0 deletions
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 000000000..e66054ed0
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,359 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <cstring>
10#include <iterator>
11#include <memory>
12#include <mutex>
13#include <optional>
14#include <unordered_map>
15#include <vector>
16
17#include "common/assert.h"
18#include "core/core.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h"
21#include "video_core/memory_manager.h"
22#include "video_core/rasterizer_interface.h"
23
24namespace VideoCommon {
25
26template <class QueryCache, class HostCounter>
27class CounterStreamBase {
28public:
29 explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
30 : cache{cache}, type{type} {}
31
32 /// Updates the state of the stream, enabling or disabling as needed.
33 void Update(bool enabled) {
34 if (enabled) {
35 Enable();
36 } else {
37 Disable();
38 }
39 }
40
41 /// Resets the stream to zero. It doesn't disable the query after resetting.
42 void Reset() {
43 if (current) {
44 current->EndQuery();
45
46 // Immediately start a new query to avoid disabling its state.
47 current = cache.Counter(nullptr, type);
48 }
49 last = nullptr;
50 }
51
52 /// Returns the current counter slicing as needed.
53 std::shared_ptr<HostCounter> Current() {
54 if (!current) {
55 return nullptr;
56 }
57 current->EndQuery();
58 last = std::move(current);
59 current = cache.Counter(last, type);
60 return last;
61 }
62
63 /// Returns true when the counter stream is enabled.
64 bool IsEnabled() const {
65 return current != nullptr;
66 }
67
68private:
69 /// Enables the stream.
70 void Enable() {
71 if (current) {
72 return;
73 }
74 current = cache.Counter(last, type);
75 }
76
77 // Disables the stream.
78 void Disable() {
79 if (current) {
80 current->EndQuery();
81 }
82 last = std::exchange(current, nullptr);
83 }
84
85 QueryCache& cache;
86 const VideoCore::QueryType type;
87
88 std::shared_ptr<HostCounter> current;
89 std::shared_ptr<HostCounter> last;
90};
91
92template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
93 class QueryPool>
94class QueryCacheBase {
95public:
96 explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
97 : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
98 static_cast<QueryCache&>(*this),
99 VideoCore::QueryType::SamplesPassed}}} {}
100
101 void InvalidateRegion(CacheAddr addr, std::size_t size) {
102 std::unique_lock lock{mutex};
103 FlushAndRemoveRegion(addr, size);
104 }
105
106 void FlushRegion(CacheAddr addr, std::size_t size) {
107 std::unique_lock lock{mutex};
108 FlushAndRemoveRegion(addr, size);
109 }
110
111 /**
112 * Records a query in GPU mapped memory, potentially marked with a timestamp.
113 * @param gpu_addr GPU address to flush to when the mapped memory is read.
114 * @param type Query type, e.g. SamplesPassed.
115 * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
116 */
117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
118 std::unique_lock lock{mutex};
119 auto& memory_manager = system.GPU().MemoryManager();
120 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
121
122 CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
123 if (!query) {
124 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
125 ASSERT_OR_EXECUTE(cpu_addr, return;);
126
127 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
128 }
129
130 query->BindCounter(Stream(type).Current(), timestamp);
131 }
132
133 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
134 void UpdateCounters() {
135 std::unique_lock lock{mutex};
136 const auto& regs = system.GPU().Maxwell3D().regs;
137 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
138 }
139
140 /// Resets a counter to zero. It doesn't disable the query after resetting.
141 void ResetCounter(VideoCore::QueryType type) {
142 std::unique_lock lock{mutex};
143 Stream(type).Reset();
144 }
145
146 /// Disable all active streams. Expected to be called at the end of a command buffer.
147 void DisableStreams() {
148 std::unique_lock lock{mutex};
149 for (auto& stream : streams) {
150 stream.Update(false);
151 }
152 }
153
154 /// Returns a new host counter.
155 std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
156 VideoCore::QueryType type) {
157 return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
158 type);
159 }
160
161 /// Returns the counter stream of the specified type.
162 CounterStream& Stream(VideoCore::QueryType type) {
163 return streams[static_cast<std::size_t>(type)];
164 }
165
166 /// Returns the counter stream of the specified type.
167 const CounterStream& Stream(VideoCore::QueryType type) const {
168 return streams[static_cast<std::size_t>(type)];
169 }
170
171protected:
172 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
173
174private:
175 /// Flushes a memory range to guest memory and removes it from the cache.
176 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
177 const u64 addr_begin = static_cast<u64>(addr);
178 const u64 addr_end = addr_begin + static_cast<u64>(size);
179 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
180 const u64 cache_begin = query.GetCacheAddr();
181 const u64 cache_end = cache_begin + query.SizeInBytes();
182 return cache_begin < addr_end && addr_begin < cache_end;
183 };
184
185 const u64 page_end = addr_end >> PAGE_SHIFT;
186 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
187 const auto& it = cached_queries.find(page);
188 if (it == std::end(cached_queries)) {
189 continue;
190 }
191 auto& contents = it->second;
192 for (auto& query : contents) {
193 if (!in_range(query)) {
194 continue;
195 }
196 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
197 query.Flush();
198 }
199 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
200 std::end(contents));
201 }
202 }
203
204 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
205 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
206 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
207 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
208 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
209 host_ptr);
210 }
211
212 /// Tries to a get a cached query. Returns nullptr on failure.
213 CachedQuery* TryGet(CacheAddr addr) {
214 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
215 const auto it = cached_queries.find(page);
216 if (it == std::end(cached_queries)) {
217 return nullptr;
218 }
219 auto& contents = it->second;
220 const auto found =
221 std::find_if(std::begin(contents), std::end(contents),
222 [addr](auto& query) { return query.GetCacheAddr() == addr; });
223 return found != std::end(contents) ? &*found : nullptr;
224 }
225
226 static constexpr std::uintptr_t PAGE_SIZE = 4096;
227 static constexpr unsigned PAGE_SHIFT = 12;
228
229 Core::System& system;
230 VideoCore::RasterizerInterface& rasterizer;
231
232 std::recursive_mutex mutex;
233
234 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
235
236 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
237};
238
239template <class QueryCache, class HostCounter>
240class HostCounterBase {
241public:
242 explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
243 : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
244 // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
245 constexpr u64 depth_threshold = 96;
246 if (depth > depth_threshold) {
247 depth = 0;
248 base_result = dependency->Query();
249 dependency = nullptr;
250 }
251 }
252 virtual ~HostCounterBase() = default;
253
254 /// Returns the current value of the query.
255 u64 Query() {
256 if (result) {
257 return *result;
258 }
259
260 u64 value = BlockingQuery() + base_result;
261 if (dependency) {
262 value += dependency->Query();
263 dependency = nullptr;
264 }
265
266 result = value;
267 return *result;
268 }
269
270 /// Returns true when flushing this query will potentially wait.
271 bool WaitPending() const noexcept {
272 return result.has_value();
273 }
274
275 u64 Depth() const noexcept {
276 return depth;
277 }
278
279protected:
280 /// Returns the value of query from the backend API blocking as needed.
281 virtual u64 BlockingQuery() const = 0;
282
283private:
284 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
285 std::optional<u64> result; ///< Filled with the already returned value.
286 u64 depth; ///< Number of nested dependencies.
287 u64 base_result = 0; ///< Equivalent to nested dependencies value.
288};
289
290template <class HostCounter>
291class CachedQueryBase {
292public:
293 explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
294 : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
295 virtual ~CachedQueryBase() = default;
296
297 CachedQueryBase(CachedQueryBase&&) noexcept = default;
298 CachedQueryBase(const CachedQueryBase&) = delete;
299
300 CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
301 CachedQueryBase& operator=(const CachedQueryBase&) = delete;
302
303 /// Flushes the query to guest memory.
304 virtual void Flush() {
305 // When counter is nullptr it means that it's just been reseted. We are supposed to write a
306 // zero in these cases.
307 const u64 value = counter ? counter->Query() : 0;
308 std::memcpy(host_ptr, &value, sizeof(u64));
309
310 if (timestamp) {
311 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
312 }
313 }
314
315 /// Binds a counter to this query.
316 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
317 if (counter) {
318 // If there's an old counter set it means the query is being rewritten by the game.
319 // To avoid losing the data forever, flush here.
320 Flush();
321 }
322 counter = std::move(counter_);
323 timestamp = timestamp_;
324 }
325
326 VAddr CpuAddr() const noexcept {
327 return cpu_addr;
328 }
329
330 CacheAddr GetCacheAddr() const noexcept {
331 return ToCacheAddr(host_ptr);
332 }
333
334 u64 SizeInBytes() const noexcept {
335 return SizeInBytes(timestamp.has_value());
336 }
337
338 static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
339 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
340 }
341
342protected:
343 /// Returns true when querying the counter may potentially block.
344 bool WaitPending() const noexcept {
345 return counter && counter->WaitPending();
346 }
347
348private:
349 static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
350 static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
351 static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
352
353 VAddr cpu_addr; ///< Guest CPU address.
354 u8* host_ptr; ///< Writable host pointer.
355 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
356 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
357};
358
359} // namespace VideoCommon