diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/query_cache.h | 323 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 287 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.h | 122 |
4 files changed, 394 insertions, 339 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3208f4993..bb5895e99 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -37,6 +37,7 @@ add_library(video_core STATIC | |||
| 37 | memory_manager.h | 37 | memory_manager.h |
| 38 | morton.cpp | 38 | morton.cpp |
| 39 | morton.h | 39 | morton.h |
| 40 | query_cache.h | ||
| 40 | rasterizer_accelerated.cpp | 41 | rasterizer_accelerated.cpp |
| 41 | rasterizer_accelerated.h | 42 | rasterizer_accelerated.h |
| 42 | rasterizer_cache.cpp | 43 | rasterizer_cache.cpp |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h new file mode 100644 index 000000000..4c9151ce8 --- /dev/null +++ b/src/video_core/query_cache.h | |||
| @@ -0,0 +1,323 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <cstring> | ||
| 10 | #include <iterator> | ||
| 11 | #include <memory> | ||
| 12 | #include <optional> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "common/assert.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/engines/maxwell_3d.h" | ||
| 19 | #include "video_core/gpu.h" | ||
| 20 | #include "video_core/memory_manager.h" | ||
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | |||
| 23 | namespace VideoCommon { | ||
| 24 | |||
| 25 | template <class QueryCache, class HostCounter> | ||
| 26 | class CounterStreamBase { | ||
| 27 | public: | ||
| 28 | explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type) | ||
| 29 | : cache{cache}, type{type} {} | ||
| 30 | |||
| 31 | /// Updates the state of the stream, enabling or disabling as needed. | ||
| 32 | void Update(bool enabled) { | ||
| 33 | if (enabled) { | ||
| 34 | Enable(); | ||
| 35 | } else { | ||
| 36 | Disable(); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | /// Resets the stream to zero. It doesn't disable the query after resetting. | ||
| 41 | void Reset() { | ||
| 42 | if (current) { | ||
| 43 | current->EndQuery(); | ||
| 44 | |||
| 45 | // Immediately start a new query to avoid disabling its state. | ||
| 46 | current = cache.Counter(nullptr, type); | ||
| 47 | } | ||
| 48 | last = nullptr; | ||
| 49 | } | ||
| 50 | |||
| 51 | /// Returns the current counter slicing as needed. | ||
| 52 | std::shared_ptr<HostCounter> Current() { | ||
| 53 | if (!current) { | ||
| 54 | return nullptr; | ||
| 55 | } | ||
| 56 | current->EndQuery(); | ||
| 57 | last = std::move(current); | ||
| 58 | current = cache.Counter(last, type); | ||
| 59 | return last; | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Returns true when the counter stream is enabled. | ||
| 63 | bool IsEnabled() const { | ||
| 64 | return static_cast<bool>(current); | ||
| 65 | } | ||
| 66 | |||
| 67 | private: | ||
| 68 | /// Enables the stream. | ||
| 69 | void Enable() { | ||
| 70 | if (current) { | ||
| 71 | return; | ||
| 72 | } | ||
| 73 | current = cache.Counter(last, type); | ||
| 74 | } | ||
| 75 | |||
| 76 | // Disables the stream. | ||
| 77 | void Disable() { | ||
| 78 | if (current) { | ||
| 79 | current->EndQuery(); | ||
| 80 | } | ||
| 81 | last = std::exchange(current, nullptr); | ||
| 82 | } | ||
| 83 | |||
| 84 | QueryCache& cache; | ||
| 85 | const VideoCore::QueryType type; | ||
| 86 | |||
| 87 | std::shared_ptr<HostCounter> current; | ||
| 88 | std::shared_ptr<HostCounter> last; | ||
| 89 | }; | ||
| 90 | |||
| 91 | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | ||
| 92 | class QueryCacheBase { | ||
| 93 | public: | ||
| 94 | explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 95 | : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ | ||
| 96 | static_cast<QueryCache&>(*this), | ||
| 97 | VideoCore::QueryType::SamplesPassed}}} {} | ||
| 98 | |||
| 99 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 100 | FlushAndRemoveRegion(addr, size); | ||
| 101 | } | ||
| 102 | |||
| 103 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 104 | FlushAndRemoveRegion(addr, size); | ||
| 105 | } | ||
| 106 | |||
| 107 | /** | ||
| 108 | * Records a query in GPU mapped memory, potentially marked with a timestamp. | ||
| 109 | * @param gpu_addr GPU address to flush to when the mapped memory is read. | ||
| 110 | * @param type Query type, e.g. SamplesPassed. | ||
| 111 | * @param timestamp Timestamp, when empty the flushed query is assumed to be short. | ||
| 112 | */ | ||
| 113 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | ||
| 114 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 115 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 116 | |||
| 117 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | ||
| 118 | if (!query) { | ||
| 119 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 120 | ASSERT_OR_EXECUTE(cpu_addr, return;); | ||
| 121 | |||
| 122 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | ||
| 123 | } | ||
| 124 | |||
| 125 | query->BindCounter(Stream(type).Current(), timestamp); | ||
| 126 | } | ||
| 127 | |||
| 128 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | ||
| 129 | void UpdateCounters() { | ||
| 130 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 131 | Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); | ||
| 132 | } | ||
| 133 | |||
| 134 | /// Resets a counter to zero. It doesn't disable the query after resetting. | ||
| 135 | void ResetCounter(VideoCore::QueryType type) { | ||
| 136 | Stream(type).Reset(); | ||
| 137 | } | ||
| 138 | |||
| 139 | /// Returns a new host counter. | ||
| 140 | std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, | ||
| 141 | VideoCore::QueryType type) { | ||
| 142 | return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency), | ||
| 143 | type); | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Returns the counter stream of the specified type. | ||
| 147 | CounterStream& Stream(VideoCore::QueryType type) { | ||
| 148 | return streams[static_cast<std::size_t>(type)]; | ||
| 149 | } | ||
| 150 | |||
| 151 | private: | ||
| 152 | /// Flushes a memory range to guest memory and removes it from the cache. | ||
| 153 | void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | ||
| 154 | const u64 addr_begin = static_cast<u64>(addr); | ||
| 155 | const u64 addr_end = addr_begin + static_cast<u64>(size); | ||
| 156 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||
| 157 | const u64 cache_begin = query.CacheAddr(); | ||
| 158 | const u64 cache_end = cache_begin + query.SizeInBytes(); | ||
| 159 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 160 | }; | ||
| 161 | |||
| 162 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 163 | for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 164 | const auto& it = cached_queries.find(page); | ||
| 165 | if (it == std::end(cached_queries)) { | ||
| 166 | continue; | ||
| 167 | } | ||
| 168 | auto& contents = it->second; | ||
| 169 | for (auto& query : contents) { | ||
| 170 | if (!in_range(query)) { | ||
| 171 | continue; | ||
| 172 | } | ||
| 173 | rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | ||
| 174 | query.Flush(); | ||
| 175 | } | ||
| 176 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||
| 177 | std::end(contents)); | ||
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. | ||
| 182 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | ||
| 183 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | ||
| 184 | const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | ||
| 185 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | ||
| 186 | host_ptr); | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Tries to a get a cached query. Returns nullptr on failure. | ||
| 190 | CachedQuery* TryGet(CacheAddr addr) { | ||
| 191 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||
| 192 | const auto it = cached_queries.find(page); | ||
| 193 | if (it == std::end(cached_queries)) { | ||
| 194 | return nullptr; | ||
| 195 | } | ||
| 196 | auto& contents = it->second; | ||
| 197 | const auto found = std::find_if(std::begin(contents), std::end(contents), | ||
| 198 | [addr](auto& query) { return query.CacheAddr() == addr; }); | ||
| 199 | return found != std::end(contents) ? &*found : nullptr; | ||
| 200 | } | ||
| 201 | |||
| 202 | static constexpr std::uintptr_t PAGE_SIZE = 4096; | ||
| 203 | static constexpr int PAGE_SHIFT = 12; | ||
| 204 | |||
| 205 | Core::System& system; | ||
| 206 | VideoCore::RasterizerInterface& rasterizer; | ||
| 207 | |||
| 208 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||
| 209 | |||
| 210 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | ||
| 211 | }; | ||
| 212 | |||
| 213 | template <class QueryCache, class HostCounter> | ||
| 214 | class HostCounterBase { | ||
| 215 | public: | ||
| 216 | explicit HostCounterBase(std::shared_ptr<HostCounter> dependency) | ||
| 217 | : dependency{std::move(dependency)} {} | ||
| 218 | |||
| 219 | /// Returns the current value of the query. | ||
| 220 | u64 Query() { | ||
| 221 | if (result) { | ||
| 222 | return *result; | ||
| 223 | } | ||
| 224 | |||
| 225 | u64 value = BlockingQuery(); | ||
| 226 | if (dependency) { | ||
| 227 | value += dependency->Query(); | ||
| 228 | } | ||
| 229 | |||
| 230 | return *(result = value); | ||
| 231 | } | ||
| 232 | |||
| 233 | /// Returns true when flushing this query will potentially wait. | ||
| 234 | bool WaitPending() const noexcept { | ||
| 235 | return result.has_value(); | ||
| 236 | } | ||
| 237 | |||
| 238 | protected: | ||
| 239 | /// Returns the value of query from the backend API blocking as needed. | ||
| 240 | virtual u64 BlockingQuery() const = 0; | ||
| 241 | |||
| 242 | private: | ||
| 243 | std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | ||
| 244 | std::optional<u64> result; ///< Filled with the already returned value. | ||
| 245 | }; | ||
| 246 | |||
| 247 | template <class HostCounter> | ||
| 248 | class CachedQueryBase { | ||
| 249 | public: | ||
| 250 | explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr) | ||
| 251 | : cpu_addr{cpu_addr}, host_ptr{host_ptr} {} | ||
| 252 | |||
| 253 | CachedQueryBase(CachedQueryBase&& rhs) noexcept | ||
| 254 | : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)}, | ||
| 255 | timestamp{rhs.timestamp} {} | ||
| 256 | |||
| 257 | CachedQueryBase(const CachedQueryBase&) = delete; | ||
| 258 | |||
| 259 | CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept { | ||
| 260 | cpu_addr = rhs.cpu_addr; | ||
| 261 | host_ptr = rhs.host_ptr; | ||
| 262 | counter = std::move(rhs.counter); | ||
| 263 | timestamp = rhs.timestamp; | ||
| 264 | return *this; | ||
| 265 | } | ||
| 266 | |||
| 267 | /// Flushes the query to guest memory. | ||
| 268 | virtual void Flush() { | ||
| 269 | // When counter is nullptr it means that it's just been reseted. We are supposed to write a | ||
| 270 | // zero in these cases. | ||
| 271 | const u64 value = counter ? counter->Query() : 0; | ||
| 272 | std::memcpy(host_ptr, &value, sizeof(u64)); | ||
| 273 | |||
| 274 | if (timestamp) { | ||
| 275 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | ||
| 276 | } | ||
| 277 | } | ||
| 278 | |||
| 279 | /// Binds a counter to this query. | ||
| 280 | void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | ||
| 281 | if (counter) { | ||
| 282 | // If there's an old counter set it means the query is being rewritten by the game. | ||
| 283 | // To avoid losing the data forever, flush here. | ||
| 284 | Flush(); | ||
| 285 | } | ||
| 286 | counter = std::move(counter_); | ||
| 287 | timestamp = timestamp_; | ||
| 288 | } | ||
| 289 | |||
| 290 | VAddr CpuAddr() const noexcept { | ||
| 291 | return cpu_addr; | ||
| 292 | } | ||
| 293 | |||
| 294 | CacheAddr CacheAddr() const noexcept { | ||
| 295 | return ToCacheAddr(host_ptr); | ||
| 296 | } | ||
| 297 | |||
| 298 | u64 SizeInBytes() const noexcept { | ||
| 299 | return SizeInBytes(timestamp.has_value()); | ||
| 300 | } | ||
| 301 | |||
| 302 | static u64 SizeInBytes(bool with_timestamp) { | ||
| 303 | return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | ||
| 304 | } | ||
| 305 | |||
| 306 | protected: | ||
| 307 | /// Returns true when querying the counter may potentially block. | ||
| 308 | bool WaitPending() const noexcept { | ||
| 309 | return counter && counter->WaitPending(); | ||
| 310 | } | ||
| 311 | |||
| 312 | private: | ||
| 313 | static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp. | ||
| 314 | static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp. | ||
| 315 | static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query. | ||
| 316 | |||
| 317 | VAddr cpu_addr; ///< Guest CPU address. | ||
| 318 | u8* host_ptr; ///< Writable host pointer. | ||
| 319 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | ||
| 320 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | ||
| 321 | }; | ||
| 322 | |||
| 323 | } // namespace VideoCommon | ||
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 74cb73209..7d5a044c7 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -20,211 +20,49 @@ | |||
| 20 | 20 | ||
| 21 | namespace OpenGL { | 21 | namespace OpenGL { |
| 22 | 22 | ||
| 23 | using VideoCore::QueryType; | ||
| 24 | |||
| 25 | namespace { | 23 | namespace { |
| 26 | 24 | ||
| 27 | constexpr std::uintptr_t PAGE_SIZE = 4096; | ||
| 28 | constexpr int PAGE_SHIFT = 12; | ||
| 29 | |||
| 30 | constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp | ||
| 31 | constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp | ||
| 32 | constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8; | ||
| 33 | |||
| 34 | constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; | 25 | constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; |
| 35 | 26 | ||
| 36 | constexpr GLenum GetTarget(QueryType type) { | 27 | constexpr GLenum GetTarget(VideoCore::QueryType type) { |
| 37 | return QueryTargets[static_cast<std::size_t>(type)]; | 28 | return QueryTargets[static_cast<std::size_t>(type)]; |
| 38 | } | 29 | } |
| 39 | 30 | ||
| 40 | } // Anonymous namespace | 31 | } // Anonymous namespace |
| 41 | 32 | ||
| 42 | CounterStream::CounterStream(QueryCache& cache, QueryType type) | 33 | QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) |
| 43 | : cache{cache}, type{type}, target{GetTarget(type)} {} | 34 | : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, |
| 44 | 35 | HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>( | |
| 45 | CounterStream::~CounterStream() = default; | 36 | gl_rasterizer)}, |
| 46 | 37 | gl_rasterizer{gl_rasterizer} {} | |
| 47 | void CounterStream::Update(bool enabled, bool any_command_queued) { | ||
| 48 | if (enabled) { | ||
| 49 | Enable(); | ||
| 50 | } else { | ||
| 51 | Disable(any_command_queued); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | void CounterStream::Reset(bool any_command_queued) { | ||
| 56 | if (current) { | ||
| 57 | EndQuery(any_command_queued); | ||
| 58 | |||
| 59 | // Immediately start a new query to avoid disabling its state. | ||
| 60 | current = cache.GetHostCounter(nullptr, type); | ||
| 61 | } | ||
| 62 | last = nullptr; | ||
| 63 | } | ||
| 64 | |||
| 65 | std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) { | ||
| 66 | if (!current) { | ||
| 67 | return nullptr; | ||
| 68 | } | ||
| 69 | EndQuery(any_command_queued); | ||
| 70 | last = std::move(current); | ||
| 71 | current = cache.GetHostCounter(last, type); | ||
| 72 | return last; | ||
| 73 | } | ||
| 74 | |||
| 75 | void CounterStream::Enable() { | ||
| 76 | if (current) { | ||
| 77 | return; | ||
| 78 | } | ||
| 79 | current = cache.GetHostCounter(last, type); | ||
| 80 | } | ||
| 81 | |||
| 82 | void CounterStream::Disable(bool any_command_queued) { | ||
| 83 | if (current) { | ||
| 84 | EndQuery(any_command_queued); | ||
| 85 | } | ||
| 86 | last = std::exchange(current, nullptr); | ||
| 87 | } | ||
| 88 | |||
| 89 | void CounterStream::EndQuery(bool any_command_queued) { | ||
| 90 | if (!any_command_queued) { | ||
| 91 | // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not | ||
| 92 | // having any of these causes a lock. glFlush is considered a command, so we can safely wait | ||
| 93 | // for this. Insert to the OpenGL command stream a flush. | ||
| 94 | glFlush(); | ||
| 95 | } | ||
| 96 | glEndQuery(target); | ||
| 97 | } | ||
| 98 | |||
| 99 | QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) | ||
| 100 | : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this, | ||
| 101 | QueryType::SamplesPassed}}} {} | ||
| 102 | 38 | ||
| 103 | QueryCache::~QueryCache() = default; | 39 | QueryCache::~QueryCache() = default; |
| 104 | 40 | ||
| 105 | void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { | 41 | OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { |
| 106 | const u64 addr_begin = static_cast<u64>(addr); | 42 | auto& reserve = queries_reserve[static_cast<std::size_t>(type)]; |
| 107 | const u64 addr_end = addr_begin + static_cast<u64>(size); | ||
| 108 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||
| 109 | const u64 cache_begin = query.GetCacheAddr(); | ||
| 110 | const u64 cache_end = cache_begin + query.GetSizeInBytes(); | ||
| 111 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 112 | }; | ||
| 113 | |||
| 114 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 115 | for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 116 | const auto& it = cached_queries.find(page); | ||
| 117 | if (it == std::end(cached_queries)) { | ||
| 118 | continue; | ||
| 119 | } | ||
| 120 | auto& contents = it->second; | ||
| 121 | for (auto& query : contents) { | ||
| 122 | if (!in_range(query)) { | ||
| 123 | continue; | ||
| 124 | } | ||
| 125 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1); | ||
| 126 | Flush(query); | ||
| 127 | } | ||
| 128 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||
| 129 | std::end(contents)); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 134 | // We can handle flushes in the same way as invalidations. | ||
| 135 | InvalidateRegion(addr, size); | ||
| 136 | } | ||
| 137 | |||
| 138 | void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) { | ||
| 139 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 140 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 141 | |||
| 142 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | ||
| 143 | if (!query) { | ||
| 144 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 145 | ASSERT_OR_EXECUTE(cpu_addr, return;); | ||
| 146 | |||
| 147 | query = &Register(CachedQuery(type, *cpu_addr, host_ptr)); | ||
| 148 | } | ||
| 149 | |||
| 150 | query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp); | ||
| 151 | } | ||
| 152 | |||
| 153 | void QueryCache::UpdateCounters() { | ||
| 154 | auto& samples_passed = GetStream(QueryType::SamplesPassed); | ||
| 155 | |||
| 156 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 157 | samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued()); | ||
| 158 | } | ||
| 159 | |||
| 160 | void QueryCache::ResetCounter(QueryType type) { | ||
| 161 | GetStream(type).Reset(rasterizer.AnyCommandQueued()); | ||
| 162 | } | ||
| 163 | |||
| 164 | void QueryCache::Reserve(QueryType type, OGLQuery&& query) { | ||
| 165 | reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query)); | ||
| 166 | } | ||
| 167 | |||
| 168 | std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency, | ||
| 169 | QueryType type) { | ||
| 170 | auto& reserve = reserved_queries[static_cast<std::size_t>(type)]; | ||
| 171 | OGLQuery query; | 43 | OGLQuery query; |
| 172 | if (reserve.empty()) { | 44 | if (reserve.empty()) { |
| 173 | query.Create(GetTarget(type)); | 45 | query.Create(GetTarget(type)); |
| 174 | } else { | 46 | return query; |
| 175 | query = std::move(reserve.back()); | ||
| 176 | reserve.pop_back(); | ||
| 177 | } | 47 | } |
| 178 | 48 | ||
| 179 | return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query)); | 49 | query = std::move(reserve.back()); |
| 50 | reserve.pop_back(); | ||
| 51 | return query; | ||
| 180 | } | 52 | } |
| 181 | 53 | ||
| 182 | CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { | 54 | void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { |
| 183 | const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT; | 55 | queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query)); |
| 184 | auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query)); | ||
| 185 | rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1); | ||
| 186 | return stored_ref; | ||
| 187 | } | ||
| 188 | |||
| 189 | CachedQuery* QueryCache::TryGet(CacheAddr addr) { | ||
| 190 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||
| 191 | const auto it = cached_queries.find(page); | ||
| 192 | if (it == std::end(cached_queries)) { | ||
| 193 | return nullptr; | ||
| 194 | } | ||
| 195 | auto& contents = it->second; | ||
| 196 | const auto found = | ||
| 197 | std::find_if(std::begin(contents), std::end(contents), | ||
| 198 | [addr](const auto& query) { return query.GetCacheAddr() == addr; }); | ||
| 199 | return found != std::end(contents) ? &*found : nullptr; | ||
| 200 | } | ||
| 201 | |||
| 202 | void QueryCache::Flush(CachedQuery& cached_query) { | ||
| 203 | auto& stream = GetStream(cached_query.GetType()); | ||
| 204 | |||
| 205 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. | ||
| 206 | // To avoid this disable and re-enable keeping the dependency stream. | ||
| 207 | // But we only have to do this if we have pending waits to be done. | ||
| 208 | const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending(); | ||
| 209 | const bool any_command_queued = rasterizer.AnyCommandQueued(); | ||
| 210 | if (slice_counter) { | ||
| 211 | stream.Update(false, any_command_queued); | ||
| 212 | } | ||
| 213 | |||
| 214 | cached_query.Flush(); | ||
| 215 | |||
| 216 | if (slice_counter) { | ||
| 217 | stream.Update(true, any_command_queued); | ||
| 218 | } | ||
| 219 | } | 56 | } |
| 220 | 57 | ||
| 221 | CounterStream& QueryCache::GetStream(QueryType type) { | 58 | bool QueryCache::AnyCommandQueued() const noexcept { |
| 222 | return streams[static_cast<std::size_t>(type)]; | 59 | return gl_rasterizer.AnyCommandQueued(); |
| 223 | } | 60 | } |
| 224 | 61 | ||
| 225 | HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, | 62 | HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, |
| 226 | OGLQuery&& query_) | 63 | VideoCore::QueryType type) |
| 227 | : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { | 64 | : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache}, |
| 65 | type{type}, query{cache.AllocateQuery(type)} { | ||
| 228 | glBeginQuery(GetTarget(type), query.handle); | 66 | glBeginQuery(GetTarget(type), query.handle); |
| 229 | } | 67 | } |
| 230 | 68 | ||
| @@ -232,81 +70,50 @@ HostCounter::~HostCounter() { | |||
| 232 | cache.Reserve(type, std::move(query)); | 70 | cache.Reserve(type, std::move(query)); |
| 233 | } | 71 | } |
| 234 | 72 | ||
| 235 | u64 HostCounter::Query() { | 73 | void HostCounter::EndQuery() { |
| 236 | if (result) { | 74 | if (!cache.AnyCommandQueued()) { |
| 237 | return *result; | 75 | // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not |
| 238 | } | 76 | // having any of these causes a lock. glFlush is considered a command, so we can safely wait |
| 239 | 77 | // for this. Insert to the OpenGL command stream a flush. | |
| 240 | u64 value; | 78 | glFlush(); |
| 241 | glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); | ||
| 242 | if (dependency) { | ||
| 243 | value += dependency->Query(); | ||
| 244 | } | 79 | } |
| 245 | 80 | glEndQuery(GetTarget(type)); | |
| 246 | return *(result = value); | ||
| 247 | } | 81 | } |
| 248 | 82 | ||
| 249 | bool HostCounter::WaitPending() const noexcept { | 83 | u64 HostCounter::BlockingQuery() const { |
| 250 | return result.has_value(); | 84 | GLint64 value; |
| 85 | glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value); | ||
| 86 | return static_cast<u64>(value); | ||
| 251 | } | 87 | } |
| 252 | 88 | ||
| 253 | CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) | 89 | CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) |
| 254 | : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} | 90 | : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} |
| 255 | 91 | ||
| 256 | CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept | 92 | CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept |
| 257 | : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, | 93 | : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} |
| 258 | counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {} | ||
| 259 | |||
| 260 | CachedQuery::~CachedQuery() = default; | ||
| 261 | 94 | ||
| 262 | CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { | 95 | CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { |
| 96 | VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); | ||
| 97 | cache = rhs.cache; | ||
| 263 | type = rhs.type; | 98 | type = rhs.type; |
| 264 | cpu_addr = rhs.cpu_addr; | ||
| 265 | host_ptr = rhs.host_ptr; | ||
| 266 | counter = std::move(rhs.counter); | ||
| 267 | timestamp = rhs.timestamp; | ||
| 268 | return *this; | 99 | return *this; |
| 269 | } | 100 | } |
| 270 | 101 | ||
| 271 | void CachedQuery::Flush() { | 102 | void CachedQuery::Flush() { |
| 272 | // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero | 103 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. |
| 273 | // in these cases. | 104 | // To avoid this disable and re-enable keeping the dependency stream. |
| 274 | const u64 value = counter ? counter->Query() : 0; | 105 | // But we only have to do this if we have pending waits to be done. |
| 275 | std::memcpy(host_ptr, &value, sizeof(u64)); | 106 | auto& stream = cache->Stream(type); |
| 276 | 107 | const bool slice_counter = WaitPending() && stream.IsEnabled(); | |
| 277 | if (timestamp) { | 108 | if (slice_counter) { |
| 278 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | 109 | stream.Update(false); |
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { | ||
| 283 | if (counter) { | ||
| 284 | // If there's an old counter set it means the query is being rewritten by the game. | ||
| 285 | // To avoid losing the data forever, flush here. | ||
| 286 | Flush(); | ||
| 287 | } | 110 | } |
| 288 | counter = std::move(counter_); | ||
| 289 | timestamp = timestamp_; | ||
| 290 | } | ||
| 291 | |||
| 292 | bool CachedQuery::WaitPending() const noexcept { | ||
| 293 | return counter && counter->WaitPending(); | ||
| 294 | } | ||
| 295 | 111 | ||
| 296 | QueryType CachedQuery::GetType() const noexcept { | 112 | VideoCommon::CachedQueryBase<HostCounter>::Flush(); |
| 297 | return type; | ||
| 298 | } | ||
| 299 | 113 | ||
| 300 | VAddr CachedQuery::GetCpuAddr() const noexcept { | 114 | if (slice_counter) { |
| 301 | return cpu_addr; | 115 | stream.Update(true); |
| 302 | } | 116 | } |
| 303 | |||
| 304 | CacheAddr CachedQuery::GetCacheAddr() const noexcept { | ||
| 305 | return ToCacheAddr(host_ptr); | ||
| 306 | } | ||
| 307 | |||
| 308 | u64 CachedQuery::GetSizeInBytes() const noexcept { | ||
| 309 | return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | ||
| 310 | } | 117 | } |
| 311 | 118 | ||
| 312 | } // namespace OpenGL | 119 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index d9f22b44d..20d337f15 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <glad/glad.h> | 13 | #include <glad/glad.h> |
| 14 | 14 | ||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "video_core/query_cache.h" | ||
| 16 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 18 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | 19 | ||
| @@ -24,134 +25,57 @@ namespace OpenGL { | |||
| 24 | 25 | ||
| 25 | class CachedQuery; | 26 | class CachedQuery; |
| 26 | class HostCounter; | 27 | class HostCounter; |
| 27 | class RasterizerOpenGL; | ||
| 28 | class QueryCache; | 28 | class QueryCache; |
| 29 | class RasterizerOpenGL; | ||
| 29 | 30 | ||
| 30 | class CounterStream final { | 31 | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; |
| 31 | public: | ||
| 32 | explicit CounterStream(QueryCache& cache, VideoCore::QueryType type); | ||
| 33 | ~CounterStream(); | ||
| 34 | |||
| 35 | void Update(bool enabled, bool any_command_queued); | ||
| 36 | |||
| 37 | void Reset(bool any_command_queued); | ||
| 38 | |||
| 39 | std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued); | ||
| 40 | |||
| 41 | bool IsEnabled() const { | ||
| 42 | return current != nullptr; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | void Enable(); | ||
| 47 | |||
| 48 | void Disable(bool any_command_queued); | ||
| 49 | |||
| 50 | void EndQuery(bool any_command_queued); | ||
| 51 | 32 | ||
| 52 | QueryCache& cache; | 33 | class QueryCache final |
| 53 | 34 | : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { | |
| 54 | std::shared_ptr<HostCounter> current; | ||
| 55 | std::shared_ptr<HostCounter> last; | ||
| 56 | VideoCore::QueryType type; | ||
| 57 | GLenum target; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class QueryCache final { | ||
| 61 | public: | 35 | public: |
| 62 | explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); | 36 | explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); |
| 63 | ~QueryCache(); | 37 | ~QueryCache(); |
| 64 | 38 | ||
| 65 | void InvalidateRegion(CacheAddr addr, std::size_t size); | 39 | OGLQuery AllocateQuery(VideoCore::QueryType type); |
| 66 | |||
| 67 | void FlushRegion(CacheAddr addr, std::size_t size); | ||
| 68 | |||
| 69 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp); | ||
| 70 | |||
| 71 | void UpdateCounters(); | ||
| 72 | |||
| 73 | void ResetCounter(VideoCore::QueryType type); | ||
| 74 | 40 | ||
| 75 | void Reserve(VideoCore::QueryType type, OGLQuery&& query); | 41 | void Reserve(VideoCore::QueryType type, OGLQuery&& query); |
| 76 | 42 | ||
| 77 | std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency, | 43 | bool AnyCommandQueued() const noexcept; |
| 78 | VideoCore::QueryType type); | ||
| 79 | 44 | ||
| 80 | private: | 45 | private: |
| 81 | CachedQuery& Register(CachedQuery&& cached_query); | 46 | RasterizerOpenGL& gl_rasterizer; |
| 82 | 47 | std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve; | |
| 83 | CachedQuery* TryGet(CacheAddr addr); | ||
| 84 | |||
| 85 | void Flush(CachedQuery& cached_query); | ||
| 86 | |||
| 87 | CounterStream& GetStream(VideoCore::QueryType type); | ||
| 88 | |||
| 89 | Core::System& system; | ||
| 90 | RasterizerOpenGL& rasterizer; | ||
| 91 | |||
| 92 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||
| 93 | |||
| 94 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | ||
| 95 | std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries; | ||
| 96 | }; | 48 | }; |
| 97 | 49 | ||
| 98 | class HostCounter final { | 50 | class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { |
| 99 | public: | 51 | public: |
| 100 | explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, | 52 | explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, |
| 101 | VideoCore::QueryType type, OGLQuery&& query); | 53 | VideoCore::QueryType type); |
| 102 | ~HostCounter(); | 54 | ~HostCounter(); |
| 103 | 55 | ||
| 104 | /// Returns the current value of the query. | 56 | void EndQuery(); |
| 105 | u64 Query(); | ||
| 106 | |||
| 107 | /// Returns true when querying this counter will potentially wait for OpenGL. | ||
| 108 | bool WaitPending() const noexcept; | ||
| 109 | 57 | ||
| 110 | private: | 58 | private: |
| 59 | u64 BlockingQuery() const override; | ||
| 60 | |||
| 111 | QueryCache& cache; | 61 | QueryCache& cache; |
| 112 | VideoCore::QueryType type; | 62 | VideoCore::QueryType type; |
| 113 | 63 | OGLQuery query; | |
| 114 | std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one. | ||
| 115 | OGLQuery query; ///< OpenGL query. | ||
| 116 | std::optional<u64> result; ///< Added values of the counter. | ||
| 117 | }; | 64 | }; |
| 118 | 65 | ||
| 119 | class CachedQuery final { | 66 | class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { |
| 120 | public: | 67 | public: |
| 121 | explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); | 68 | explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, |
| 122 | CachedQuery(CachedQuery&&) noexcept; | 69 | u8* host_ptr); |
| 123 | CachedQuery(const CachedQuery&) = delete; | 70 | CachedQuery(CachedQuery&& rhs) noexcept; |
| 124 | ~CachedQuery(); | ||
| 125 | |||
| 126 | CachedQuery& operator=(CachedQuery&&) noexcept; | ||
| 127 | |||
| 128 | /// Writes the counter value to host memory. | ||
| 129 | void Flush(); | ||
| 130 | |||
| 131 | /// Updates the counter this cached query registered in guest memory will write when requested. | ||
| 132 | void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp); | ||
| 133 | 71 | ||
| 134 | /// Returns true when a flushing this query will potentially wait for OpenGL. | 72 | CachedQuery& operator=(CachedQuery&& rhs) noexcept; |
| 135 | bool WaitPending() const noexcept; | ||
| 136 | 73 | ||
| 137 | /// Returns the query type. | 74 | void Flush() override; |
| 138 | VideoCore::QueryType GetType() const noexcept; | ||
| 139 | |||
| 140 | /// Returns the guest CPU address for this query. | ||
| 141 | VAddr GetCpuAddr() const noexcept; | ||
| 142 | |||
| 143 | /// Returns the cache address for this query. | ||
| 144 | CacheAddr GetCacheAddr() const noexcept; | ||
| 145 | |||
| 146 | /// Returns the number of cached bytes. | ||
| 147 | u64 GetSizeInBytes() const noexcept; | ||
| 148 | 75 | ||
| 149 | private: | 76 | private: |
| 150 | VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed). | 77 | QueryCache* cache; |
| 151 | VAddr cpu_addr; ///< Guest CPU address. | 78 | VideoCore::QueryType type; |
| 152 | u8* host_ptr; ///< Writable host pointer. | ||
| 153 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | ||
| 154 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | ||
| 155 | }; | 79 | }; |
| 156 | 80 | ||
| 157 | } // namespace OpenGL | 81 | } // namespace OpenGL |