diff options
Diffstat (limited to 'src/video_core/query_cache.h')
| -rw-r--r-- | src/video_core/query_cache.h | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..4c9151ce8 --- /dev/null +++ b/src/video_core/query_cache.h | |||
| @@ -0,0 +1,323 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <cstring> | ||
| 10 | #include <iterator> | ||
| 11 | #include <memory> | ||
| 12 | #include <optional> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "common/assert.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/engines/maxwell_3d.h" | ||
| 19 | #include "video_core/gpu.h" | ||
| 20 | #include "video_core/memory_manager.h" | ||
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | template <class QueryCache, class HostCounter> | ||
| 26 | class CounterStreamBase { | ||
| 27 | public: | ||
| 28 | explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) | ||
| 29 | : cache{cache}, type{type} {} | ||
| 30 | |||
| 31 | /// Updates the state of the stream, enabling or disabling as needed. | ||
| 32 | void Update(bool enabled) { | ||
| 33 | if (enabled) { | ||
| 34 | Enable(); | ||
| 35 | } else { | ||
| 36 | Disable(); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | /// Resets the stream to zero. It doesn't disable the query after resetting. | ||
| 41 | void Reset() { | ||
| 42 | if (current) { | ||
| 43 | current->EndQuery(); | ||
| 44 | |||
| 45 | // Immediately start a new query to avoid disabling its state. | ||
| 46 | current = cache.Counter(nullptr, type); | ||
| 47 | } | ||
| 48 | last = nullptr; | ||
| 49 | } | ||
| 50 | |||
| 51 | /// Returns the current counter slicing as needed. | ||
| 52 | std::shared_ptr<HostCounter> Current() { | ||
| 53 | if (!current) { | ||
| 54 | return nullptr; | ||
| 55 | } | ||
| 56 | current->EndQuery(); | ||
| 57 | last = std::move(current); | ||
| 58 | current = cache.Counter(last, type); | ||
| 59 | return last; | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Returns true when the counter stream is enabled. | ||
| 63 | bool IsEnabled() const { | ||
| 64 | return static_cast<bool>(current); | ||
| 65 | } | ||
| 66 | |||
| 67 | private: | ||
| 68 | /// Enables the stream. | ||
| 69 | void Enable() { | ||
| 70 | if (current) { | ||
| 71 | return; | ||
| 72 | } | ||
| 73 | current = cache.Counter(last, type); | ||
| 74 | } | ||
| 75 | |||
| 76 | // Disables the stream. | ||
| 77 | void Disable() { | ||
| 78 | if (current) { | ||
| 79 | current->EndQuery(); | ||
| 80 | } | ||
| 81 | last = std::exchange(current, nullptr); | ||
| 82 | } | ||
| 83 | |||
| 84 | QueryCache& cache; | ||
| 85 | const VideoCore::QueryType type; | ||
| 86 | |||
| 87 | std::shared_ptr<HostCounter> current; | ||
| 88 | std::shared_ptr<HostCounter> last; | ||
| 89 | }; | ||
| 90 | |||
| 91 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | ||
| 92 | class QueryCacheBase { | ||
| 93 | public: | ||
| 94 | explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 95 | : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ | ||
| 96 | static_cast<QueryCache&>(*this), | ||
| 97 | VideoCore::QueryType::SamplesPassed}}} {} | ||
| 98 | |||
| 99 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 100 | FlushAndRemoveRegion(addr, size); | ||
| 101 | } | ||
| 102 | |||
| 103 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 104 | FlushAndRemoveRegion(addr, size); | ||
| 105 | } | ||
| 106 | |||
| 107 | /** | ||
| 108 | * Records a query in GPU mapped memory, potentially marked with a timestamp. | ||
| 109 | * @param gpu_addr GPU address to flush to when the mapped memory is read. | ||
| 110 | * @param type Query type, e.g. SamplesPassed. | ||
| 111 | * @param timestamp Timestamp, when empty the flushed query is assumed to be short. | ||
| 112 | */ | ||
| 113 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | ||
| 114 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 115 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 116 | |||
| 117 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | ||
| 118 | if (!query) { | ||
| 119 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 120 | ASSERT_OR_EXECUTE(cpu_addr, return;); | ||
| 121 | |||
| 122 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | ||
| 123 | } | ||
| 124 | |||
| 125 | query->BindCounter(Stream(type).Current(), timestamp); | ||
| 126 | } | ||
| 127 | |||
| 128 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | ||
| 129 | void UpdateCounters() { | ||
| 130 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 131 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); | ||
| 132 | } | ||
| 133 | |||
| 134 | /// Resets a counter to zero. It doesn't disable the query after resetting. | ||
| 135 | void ResetCounter(VideoCore::QueryType type) { | ||
| 136 | Stream(type).Reset(); | ||
| 137 | } | ||
| 138 | |||
| 139 | /// Returns a new host counter. | ||
| 140 | std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, | ||
| 141 | VideoCore::QueryType type) { | ||
| 142 | return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), | ||
| 143 | type); | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Returns the counter stream of the specified type. | ||
| 147 | CounterStream& Stream(VideoCore::QueryType type) { | ||
| 148 | return streams[static_cast<std::size_t>(type)]; | ||
| 149 | } | ||
| 150 | |||
| 151 | private: | ||
| 152 | /// Flushes a memory range to guest memory and removes it from the cache. | ||
| 153 | void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | ||
| 154 | const u64 addr_begin = static_cast<u64>(addr); | ||
| 155 | const u64 addr_end = addr_begin + static_cast<u64>(size); | ||
| 156 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||
| 157 | const u64 cache_begin = query.CacheAddr(); | ||
| 158 | const u64 cache_end = cache_begin + query.SizeInBytes(); | ||
| 159 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 160 | }; | ||
| 161 | |||
| 162 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 163 | for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 164 | const auto& it = cached_queries.find(page); | ||
| 165 | if (it == std::end(cached_queries)) { | ||
| 166 | continue; | ||
| 167 | } | ||
| 168 | auto& contents = it->second; | ||
| 169 | for (auto& query : contents) { | ||
| 170 | if (!in_range(query)) { | ||
| 171 | continue; | ||
| 172 | } | ||
| 173 | rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | ||
| 174 | query.Flush(); | ||
| 175 | } | ||
| 176 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||
| 177 | std::end(contents)); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. | ||
| 182 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | ||
| 183 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | ||
| 184 | const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | ||
| 185 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | ||
| 186 | host_ptr); | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Tries to a get a cached query. Returns nullptr on failure. | ||
| 190 | CachedQuery* TryGet(CacheAddr addr) { | ||
| 191 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||
| 192 | const auto it = cached_queries.find(page); | ||
| 193 | if (it == std::end(cached_queries)) { | ||
| 194 | return nullptr; | ||
| 195 | } | ||
| 196 | auto& contents = it->second; | ||
| 197 | const auto found = std::find_if(std::begin(contents), std::end(contents), | ||
| 198 | [addr](auto& query) { return query.CacheAddr() == addr; }); | ||
| 199 | return found != std::end(contents) ? &*found : nullptr; | ||
| 200 | } | ||
| 201 | |||
| 202 | static constexpr std::uintptr_t PAGE_SIZE = 4096; | ||
| 203 | static constexpr int PAGE_SHIFT = 12; | ||
| 204 | |||
| 205 | Core::System& system; | ||
| 206 | VideoCore::RasterizerInterface& rasterizer; | ||
| 207 | |||
| 208 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||
| 209 | |||
| 210 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | ||
| 211 | }; | ||
| 212 | |||
| 213 | template <class QueryCache, class HostCounter> | ||
| 214 | class HostCounterBase { | ||
| 215 | public: | ||
| 216 | explicit HostCounterBase(std::shared_ptr<HostCounter> dependency) | ||
| 217 | : dependency{std::move(dependency)} {} | ||
| 218 | |||
| 219 | /// Returns the current value of the query. | ||
| 220 | u64 Query() { | ||
| 221 | if (result) { | ||
| 222 | return *result; | ||
| 223 | } | ||
| 224 | |||
| 225 | u64 value = BlockingQuery(); | ||
| 226 | if (dependency) { | ||
| 227 | value += dependency->Query(); | ||
| 228 | } | ||
| 229 | |||
| 230 | return *(result = value); | ||
| 231 | } | ||
| 232 | |||
| 233 | /// Returns true when flushing this query will potentially wait. | ||
| 234 | bool WaitPending() const noexcept { | ||
| 235 | return result.has_value(); | ||
| 236 | } | ||
| 237 | |||
| 238 | protected: | ||
| 239 | /// Returns the value of query from the backend API blocking as needed. | ||
| 240 | virtual u64 BlockingQuery() const = 0; | ||
| 241 | |||
| 242 | private: | ||
| 243 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | ||
| 244 | std::optional<u64> result; ///< Filled with the already returned value. | ||
| 245 | }; | ||
| 246 | |||
| 247 | template <class HostCounter> | ||
| 248 | class CachedQueryBase { | ||
| 249 | public: | ||
| 250 | explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) | ||
| 251 | : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} | ||
| 252 | |||
| 253 | CachedQueryBase(CachedQueryBase&& rhs) noexcept | ||
| 254 | : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)}, | ||
| 255 | timestamp{rhs.timestamp} {} | ||
| 256 | |||
| 257 | CachedQueryBase(const CachedQueryBase&) = delete; | ||
| 258 | |||
| 259 | CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept { | ||
| 260 | cpu_addr = rhs.cpu_addr; | ||
| 261 | host_ptr = rhs.host_ptr; | ||
| 262 | counter = std::move(rhs.counter); | ||
| 263 | timestamp = rhs.timestamp; | ||
| 264 | return *this; | ||
| 265 | } | ||
| 266 | |||
| 267 | /// Flushes the query to guest memory. | ||
| 268 | virtual void Flush() { | ||
| 269 | // When counter is nullptr it means that it's just been reseted. We are supposed to write a | ||
| 270 | // zero in these cases. | ||
| 271 | const u64 value = counter ? counter->Query() : 0; | ||
| 272 | std::memcpy(host_ptr, &value, sizeof(u64)); | ||
| 273 | |||
| 274 | if (timestamp) { | ||
| 275 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | ||
| 276 | } | ||
| 277 | } | ||
| 278 | |||
| 279 | /// Binds a counter to this query. | ||
| 280 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | ||
| 281 | if (counter) { | ||
| 282 | // If there's an old counter set it means the query is being rewritten by the game. | ||
| 283 | // To avoid losing the data forever, flush here. | ||
| 284 | Flush(); | ||
| 285 | } | ||
| 286 | counter = std::move(counter_); | ||
| 287 | timestamp = timestamp_; | ||
| 288 | } | ||
| 289 | |||
| 290 | VAddr CpuAddr() const noexcept { | ||
| 291 | return cpu_addr; | ||
| 292 | } | ||
| 293 | |||
| 294 | CacheAddr CacheAddr() const noexcept { | ||
| 295 | return ToCacheAddr(host_ptr); | ||
| 296 | } | ||
| 297 | |||
| 298 | u64 SizeInBytes() const noexcept { | ||
| 299 | return SizeInBytes(timestamp.has_value()); | ||
| 300 | } | ||
| 301 | |||
| 302 | static u64 SizeInBytes(bool with_timestamp) { | ||
| 303 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | ||
| 304 | } | ||
| 305 | |||
| 306 | protected: | ||
| 307 | /// Returns true when querying the counter may potentially block. | ||
| 308 | bool WaitPending() const noexcept { | ||
| 309 | return counter && counter->WaitPending(); | ||
| 310 | } | ||
| 311 | |||
| 312 | private: | ||
| 313 | static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. | ||
| 314 | static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. | ||
| 315 | static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. | ||
| 316 | |||
| 317 | VAddr cpu_addr; ///< Guest CPU address. | ||
| 318 | u8* host_ptr; ///< Writable host pointer. | ||
| 319 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | ||
| 320 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | ||
| 321 | }; | ||
| 322 | |||
| 323 | } // namespace VideoCommon | ||