diff options
Diffstat (limited to 'src/video_core/query_cache.h')
| -rw-r--r-- | src/video_core/query_cache.h | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..e66054ed0 --- /dev/null +++ b/src/video_core/query_cache.h | |||
| @@ -0,0 +1,359 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <cstring> | ||
| 10 | #include <iterator> | ||
| 11 | #include <memory> | ||
| 12 | #include <mutex> | ||
| 13 | #include <optional> | ||
| 14 | #include <unordered_map> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "core/core.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/gpu.h" | ||
| 21 | #include "video_core/memory_manager.h" | ||
| 22 | #include "video_core/rasterizer_interface.h" | ||
| 23 | |||
| 24 | namespace VideoCommon { | ||
| 25 | |||
| 26 | template <class QueryCache, class HostCounter> | ||
| 27 | class CounterStreamBase { | ||
| 28 | public: | ||
| 29 | explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) | ||
| 30 | : cache{cache}, type{type} {} | ||
| 31 | |||
| 32 | /// Updates the state of the stream, enabling or disabling as needed. | ||
| 33 | void Update(bool enabled) { | ||
| 34 | if (enabled) { | ||
| 35 | Enable(); | ||
| 36 | } else { | ||
| 37 | Disable(); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Resets the stream to zero. It doesn't disable the query after resetting. | ||
| 42 | void Reset() { | ||
| 43 | if (current) { | ||
| 44 | current->EndQuery(); | ||
| 45 | |||
| 46 | // Immediately start a new query to avoid disabling its state. | ||
| 47 | current = cache.Counter(nullptr, type); | ||
| 48 | } | ||
| 49 | last = nullptr; | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Returns the current counter slicing as needed. | ||
| 53 | std::shared_ptr<HostCounter> Current() { | ||
| 54 | if (!current) { | ||
| 55 | return nullptr; | ||
| 56 | } | ||
| 57 | current->EndQuery(); | ||
| 58 | last = std::move(current); | ||
| 59 | current = cache.Counter(last, type); | ||
| 60 | return last; | ||
| 61 | } | ||
| 62 | |||
| 63 | /// Returns true when the counter stream is enabled. | ||
| 64 | bool IsEnabled() const { | ||
| 65 | return current != nullptr; | ||
| 66 | } | ||
| 67 | |||
| 68 | private: | ||
| 69 | /// Enables the stream. | ||
| 70 | void Enable() { | ||
| 71 | if (current) { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | current = cache.Counter(last, type); | ||
| 75 | } | ||
| 76 | |||
| 77 | // Disables the stream. | ||
| 78 | void Disable() { | ||
| 79 | if (current) { | ||
| 80 | current->EndQuery(); | ||
| 81 | } | ||
| 82 | last = std::exchange(current, nullptr); | ||
| 83 | } | ||
| 84 | |||
| 85 | QueryCache& cache; | ||
| 86 | const VideoCore::QueryType type; | ||
| 87 | |||
| 88 | std::shared_ptr<HostCounter> current; | ||
| 89 | std::shared_ptr<HostCounter> last; | ||
| 90 | }; | ||
| 91 | |||
| 92 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, | ||
| 93 | class QueryPool> | ||
| 94 | class QueryCacheBase { | ||
| 95 | public: | ||
| 96 | explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 97 | : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ | ||
| 98 | static_cast<QueryCache&>(*this), | ||
| 99 | VideoCore::QueryType::SamplesPassed}}} {} | ||
| 100 | |||
| 101 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 102 | std::unique_lock lock{mutex}; | ||
| 103 | FlushAndRemoveRegion(addr, size); | ||
| 104 | } | ||
| 105 | |||
| 106 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 107 | std::unique_lock lock{mutex}; | ||
| 108 | FlushAndRemoveRegion(addr, size); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * Records a query in GPU mapped memory, potentially marked with a timestamp. | ||
| 113 | * @param gpu_addr GPU address to flush to when the mapped memory is read. | ||
| 114 | * @param type Query type, e.g. SamplesPassed. | ||
| 115 | * @param timestamp Timestamp, when empty the flushed query is assumed to be short. | ||
| 116 | */ | ||
| 117 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | ||
| 118 | std::unique_lock lock{mutex}; | ||
| 119 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 120 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 121 | |||
| 122 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | ||
| 123 | if (!query) { | ||
| 124 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 125 | ASSERT_OR_EXECUTE(cpu_addr, return;); | ||
| 126 | |||
| 127 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | ||
| 128 | } | ||
| 129 | |||
| 130 | query->BindCounter(Stream(type).Current(), timestamp); | ||
| 131 | } | ||
| 132 | |||
| 133 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | ||
| 134 | void UpdateCounters() { | ||
| 135 | std::unique_lock lock{mutex}; | ||
| 136 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 137 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); | ||
| 138 | } | ||
| 139 | |||
| 140 | /// Resets a counter to zero. It doesn't disable the query after resetting. | ||
| 141 | void ResetCounter(VideoCore::QueryType type) { | ||
| 142 | std::unique_lock lock{mutex}; | ||
| 143 | Stream(type).Reset(); | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Disable all active streams. Expected to be called at the end of a command buffer. | ||
| 147 | void DisableStreams() { | ||
| 148 | std::unique_lock lock{mutex}; | ||
| 149 | for (auto& stream : streams) { | ||
| 150 | stream.Update(false); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | /// Returns a new host counter. | ||
| 155 | std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, | ||
| 156 | VideoCore::QueryType type) { | ||
| 157 | return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), | ||
| 158 | type); | ||
| 159 | } | ||
| 160 | |||
| 161 | /// Returns the counter stream of the specified type. | ||
| 162 | CounterStream& Stream(VideoCore::QueryType type) { | ||
| 163 | return streams[static_cast<std::size_t>(type)]; | ||
| 164 | } | ||
| 165 | |||
| 166 | /// Returns the counter stream of the specified type. | ||
| 167 | const CounterStream& Stream(VideoCore::QueryType type) const { | ||
| 168 | return streams[static_cast<std::size_t>(type)]; | ||
| 169 | } | ||
| 170 | |||
| 171 | protected: | ||
| 172 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | ||
| 173 | |||
| 174 | private: | ||
| 175 | /// Flushes a memory range to guest memory and removes it from the cache. | ||
| 176 | void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | ||
| 177 | const u64 addr_begin = static_cast<u64>(addr); | ||
| 178 | const u64 addr_end = addr_begin + static_cast<u64>(size); | ||
| 179 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||
| 180 | const u64 cache_begin = query.GetCacheAddr(); | ||
| 181 | const u64 cache_end = cache_begin + query.SizeInBytes(); | ||
| 182 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 183 | }; | ||
| 184 | |||
| 185 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 186 | for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 187 | const auto& it = cached_queries.find(page); | ||
| 188 | if (it == std::end(cached_queries)) { | ||
| 189 | continue; | ||
| 190 | } | ||
| 191 | auto& contents = it->second; | ||
| 192 | for (auto& query : contents) { | ||
| 193 | if (!in_range(query)) { | ||
| 194 | continue; | ||
| 195 | } | ||
| 196 | rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | ||
| 197 | query.Flush(); | ||
| 198 | } | ||
| 199 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||
| 200 | std::end(contents)); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. | ||
| 205 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | ||
| 206 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | ||
| 207 | const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | ||
| 208 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | ||
| 209 | host_ptr); | ||
| 210 | } | ||
| 211 | |||
| 212 | /// Tries to a get a cached query. Returns nullptr on failure. | ||
| 213 | CachedQuery* TryGet(CacheAddr addr) { | ||
| 214 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||
| 215 | const auto it = cached_queries.find(page); | ||
| 216 | if (it == std::end(cached_queries)) { | ||
| 217 | return nullptr; | ||
| 218 | } | ||
| 219 | auto& contents = it->second; | ||
| 220 | const auto found = | ||
| 221 | std::find_if(std::begin(contents), std::end(contents), | ||
| 222 | [addr](auto& query) { return query.GetCacheAddr() == addr; }); | ||
| 223 | return found != std::end(contents) ? &*found : nullptr; | ||
| 224 | } | ||
| 225 | |||
| 226 | static constexpr std::uintptr_t PAGE_SIZE = 4096; | ||
| 227 | static constexpr unsigned PAGE_SHIFT = 12; | ||
| 228 | |||
| 229 | Core::System& system; | ||
| 230 | VideoCore::RasterizerInterface& rasterizer; | ||
| 231 | |||
| 232 | std::recursive_mutex mutex; | ||
| 233 | |||
| 234 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||
| 235 | |||
| 236 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | ||
| 237 | }; | ||
| 238 | |||
| 239 | template <class QueryCache, class HostCounter> | ||
| 240 | class HostCounterBase { | ||
| 241 | public: | ||
| 242 | explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) | ||
| 243 | : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { | ||
| 244 | // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. | ||
| 245 | constexpr u64 depth_threshold = 96; | ||
| 246 | if (depth > depth_threshold) { | ||
| 247 | depth = 0; | ||
| 248 | base_result = dependency->Query(); | ||
| 249 | dependency = nullptr; | ||
| 250 | } | ||
| 251 | } | ||
| 252 | virtual ~HostCounterBase() = default; | ||
| 253 | |||
| 254 | /// Returns the current value of the query. | ||
| 255 | u64 Query() { | ||
| 256 | if (result) { | ||
| 257 | return *result; | ||
| 258 | } | ||
| 259 | |||
| 260 | u64 value = BlockingQuery() + base_result; | ||
| 261 | if (dependency) { | ||
| 262 | value += dependency->Query(); | ||
| 263 | dependency = nullptr; | ||
| 264 | } | ||
| 265 | |||
| 266 | result = value; | ||
| 267 | return *result; | ||
| 268 | } | ||
| 269 | |||
| 270 | /// Returns true when flushing this query will potentially wait. | ||
| 271 | bool WaitPending() const noexcept { | ||
| 272 | return result.has_value(); | ||
| 273 | } | ||
| 274 | |||
| 275 | u64 Depth() const noexcept { | ||
| 276 | return depth; | ||
| 277 | } | ||
| 278 | |||
| 279 | protected: | ||
| 280 | /// Returns the value of query from the backend API blocking as needed. | ||
| 281 | virtual u64 BlockingQuery() const = 0; | ||
| 282 | |||
| 283 | private: | ||
| 284 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | ||
| 285 | std::optional<u64> result; ///< Filled with the already returned value. | ||
| 286 | u64 depth; ///< Number of nested dependencies. | ||
| 287 | u64 base_result = 0; ///< Equivalent to nested dependencies value. | ||
| 288 | }; | ||
| 289 | |||
| 290 | template <class HostCounter> | ||
| 291 | class CachedQueryBase { | ||
| 292 | public: | ||
| 293 | explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) | ||
| 294 | : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} | ||
| 295 | virtual ~CachedQueryBase() = default; | ||
| 296 | |||
| 297 | CachedQueryBase(CachedQueryBase&&) noexcept = default; | ||
| 298 | CachedQueryBase(const CachedQueryBase&) = delete; | ||
| 299 | |||
| 300 | CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default; | ||
| 301 | CachedQueryBase& operator=(const CachedQueryBase&) = delete; | ||
| 302 | |||
| 303 | /// Flushes the query to guest memory. | ||
| 304 | virtual void Flush() { | ||
| 305 | // When counter is nullptr it means that it's just been reseted. We are supposed to write a | ||
| 306 | // zero in these cases. | ||
| 307 | const u64 value = counter ? counter->Query() : 0; | ||
| 308 | std::memcpy(host_ptr, &value, sizeof(u64)); | ||
| 309 | |||
| 310 | if (timestamp) { | ||
| 311 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | ||
| 312 | } | ||
| 313 | } | ||
| 314 | |||
| 315 | /// Binds a counter to this query. | ||
| 316 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | ||
| 317 | if (counter) { | ||
| 318 | // If there's an old counter set it means the query is being rewritten by the game. | ||
| 319 | // To avoid losing the data forever, flush here. | ||
| 320 | Flush(); | ||
| 321 | } | ||
| 322 | counter = std::move(counter_); | ||
| 323 | timestamp = timestamp_; | ||
| 324 | } | ||
| 325 | |||
| 326 | VAddr CpuAddr() const noexcept { | ||
| 327 | return cpu_addr; | ||
| 328 | } | ||
| 329 | |||
| 330 | CacheAddr GetCacheAddr() const noexcept { | ||
| 331 | return ToCacheAddr(host_ptr); | ||
| 332 | } | ||
| 333 | |||
| 334 | u64 SizeInBytes() const noexcept { | ||
| 335 | return SizeInBytes(timestamp.has_value()); | ||
| 336 | } | ||
| 337 | |||
| 338 | static constexpr u64 SizeInBytes(bool with_timestamp) noexcept { | ||
| 339 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | ||
| 340 | } | ||
| 341 | |||
| 342 | protected: | ||
| 343 | /// Returns true when querying the counter may potentially block. | ||
| 344 | bool WaitPending() const noexcept { | ||
| 345 | return counter && counter->WaitPending(); | ||
| 346 | } | ||
| 347 | |||
| 348 | private: | ||
| 349 | static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. | ||
| 350 | static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. | ||
| 351 | static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. | ||
| 352 | |||
| 353 | VAddr cpu_addr; ///< Guest CPU address. | ||
| 354 | u8* host_ptr; ///< Writable host pointer. | ||
| 355 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | ||
| 356 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | ||
| 357 | }; | ||
| 358 | |||
| 359 | } // namespace VideoCommon | ||