summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/query_cache.h323
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp287
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h122
4 files changed, 394 insertions, 339 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3208f4993..bb5895e99 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
37 memory_manager.h 37 memory_manager.h
38 morton.cpp 38 morton.cpp
39 morton.h 39 morton.h
40 query_cache.h
40 rasterizer_accelerated.cpp 41 rasterizer_accelerated.cpp
41 rasterizer_accelerated.h 42 rasterizer_accelerated.h
42 rasterizer_cache.cpp 43 rasterizer_cache.cpp
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 000000000..4c9151ce8
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,323 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <cstring>
10#include <iterator>
11#include <memory>
12#include <optional>
13#include <unordered_map>
14#include <vector>
15
16#include "common/assert.h"
17#include "core/core.h"
18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/gpu.h"
20#include "video_core/memory_manager.h"
21#include "video_core/rasterizer_interface.h"
22
23namespace VideoCommon {
24
25template <class QueryCache, class HostCounter>
26class CounterStreamBase {
27public:
28 explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
29 : cache{cache}, type{type} {}
30
31 /// Updates the state of the stream, enabling or disabling as needed.
32 void Update(bool enabled) {
33 if (enabled) {
34 Enable();
35 } else {
36 Disable();
37 }
38 }
39
40 /// Resets the stream to zero. It doesn't disable the query after resetting.
41 void Reset() {
42 if (current) {
43 current->EndQuery();
44
45 // Immediately start a new query to avoid disabling its state.
46 current = cache.Counter(nullptr, type);
47 }
48 last = nullptr;
49 }
50
51 /// Returns the current counter slicing as needed.
52 std::shared_ptr<HostCounter> Current() {
53 if (!current) {
54 return nullptr;
55 }
56 current->EndQuery();
57 last = std::move(current);
58 current = cache.Counter(last, type);
59 return last;
60 }
61
62 /// Returns true when the counter stream is enabled.
63 bool IsEnabled() const {
64 return static_cast<bool>(current);
65 }
66
67private:
68 /// Enables the stream.
69 void Enable() {
70 if (current) {
71 return;
72 }
73 current = cache.Counter(last, type);
74 }
75
76 // Disables the stream.
77 void Disable() {
78 if (current) {
79 current->EndQuery();
80 }
81 last = std::exchange(current, nullptr);
82 }
83
84 QueryCache& cache;
85 const VideoCore::QueryType type;
86
87 std::shared_ptr<HostCounter> current;
88 std::shared_ptr<HostCounter> last;
89};
90
91template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
92class QueryCacheBase {
93public:
94 explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
95 : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
96 static_cast<QueryCache&>(*this),
97 VideoCore::QueryType::SamplesPassed}}} {}
98
99 void InvalidateRegion(CacheAddr addr, std::size_t size) {
100 FlushAndRemoveRegion(addr, size);
101 }
102
103 void FlushRegion(CacheAddr addr, std::size_t size) {
104 FlushAndRemoveRegion(addr, size);
105 }
106
107 /**
108 * Records a query in GPU mapped memory, potentially marked with a timestamp.
109 * @param gpu_addr GPU address to flush to when the mapped memory is read.
110 * @param type Query type, e.g. SamplesPassed.
111 * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
112 */
113 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
114 auto& memory_manager = system.GPU().MemoryManager();
115 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
116
117 CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
118 if (!query) {
119 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
120 ASSERT_OR_EXECUTE(cpu_addr, return;);
121
122 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
123 }
124
125 query->BindCounter(Stream(type).Current(), timestamp);
126 }
127
128 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
129 void UpdateCounters() {
130 const auto& regs = system.GPU().Maxwell3D().regs;
131 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
132 }
133
134 /// Resets a counter to zero. It doesn't disable the query after resetting.
135 void ResetCounter(VideoCore::QueryType type) {
136 Stream(type).Reset();
137 }
138
139 /// Returns a new host counter.
140 std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
141 VideoCore::QueryType type) {
142 return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
143 type);
144 }
145
146 /// Returns the counter stream of the specified type.
147 CounterStream& Stream(VideoCore::QueryType type) {
148 return streams[static_cast<std::size_t>(type)];
149 }
150
151private:
152 /// Flushes a memory range to guest memory and removes it from the cache.
153 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
154 const u64 addr_begin = static_cast<u64>(addr);
155 const u64 addr_end = addr_begin + static_cast<u64>(size);
156 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
157 const u64 cache_begin = query.CacheAddr();
158 const u64 cache_end = cache_begin + query.SizeInBytes();
159 return cache_begin < addr_end && addr_begin < cache_end;
160 };
161
162 const u64 page_end = addr_end >> PAGE_SHIFT;
163 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
164 const auto& it = cached_queries.find(page);
165 if (it == std::end(cached_queries)) {
166 continue;
167 }
168 auto& contents = it->second;
169 for (auto& query : contents) {
170 if (!in_range(query)) {
171 continue;
172 }
173 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
174 query.Flush();
175 }
176 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
177 std::end(contents));
178 }
179 }
180
181 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
182 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
183 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
184 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
185 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
186 host_ptr);
187 }
188
189 /// Tries to a get a cached query. Returns nullptr on failure.
190 CachedQuery* TryGet(CacheAddr addr) {
191 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
192 const auto it = cached_queries.find(page);
193 if (it == std::end(cached_queries)) {
194 return nullptr;
195 }
196 auto& contents = it->second;
197 const auto found = std::find_if(std::begin(contents), std::end(contents),
198 [addr](auto& query) { return query.CacheAddr() == addr; });
199 return found != std::end(contents) ? &*found : nullptr;
200 }
201
202 static constexpr std::uintptr_t PAGE_SIZE = 4096;
203 static constexpr int PAGE_SHIFT = 12;
204
205 Core::System& system;
206 VideoCore::RasterizerInterface& rasterizer;
207
208 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
209
210 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
211};
212
213template <class QueryCache, class HostCounter>
214class HostCounterBase {
215public:
216 explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
217 : dependency{std::move(dependency)} {}
218
219 /// Returns the current value of the query.
220 u64 Query() {
221 if (result) {
222 return *result;
223 }
224
225 u64 value = BlockingQuery();
226 if (dependency) {
227 value += dependency->Query();
228 }
229
230 return *(result = value);
231 }
232
233 /// Returns true when flushing this query will potentially wait.
234 bool WaitPending() const noexcept {
235 return result.has_value();
236 }
237
238protected:
239 /// Returns the value of query from the backend API blocking as needed.
240 virtual u64 BlockingQuery() const = 0;
241
242private:
243 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
244 std::optional<u64> result; ///< Filled with the already returned value.
245};
246
247template <class HostCounter>
248class CachedQueryBase {
249public:
250 explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
251 : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
252
253 CachedQueryBase(CachedQueryBase&& rhs) noexcept
254 : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)},
255 timestamp{rhs.timestamp} {}
256
257 CachedQueryBase(const CachedQueryBase&) = delete;
258
259 CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept {
260 cpu_addr = rhs.cpu_addr;
261 host_ptr = rhs.host_ptr;
262 counter = std::move(rhs.counter);
263 timestamp = rhs.timestamp;
264 return *this;
265 }
266
267 /// Flushes the query to guest memory.
268 virtual void Flush() {
269 // When counter is nullptr it means that it's just been reseted. We are supposed to write a
270 // zero in these cases.
271 const u64 value = counter ? counter->Query() : 0;
272 std::memcpy(host_ptr, &value, sizeof(u64));
273
274 if (timestamp) {
275 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
276 }
277 }
278
279 /// Binds a counter to this query.
280 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
281 if (counter) {
282 // If there's an old counter set it means the query is being rewritten by the game.
283 // To avoid losing the data forever, flush here.
284 Flush();
285 }
286 counter = std::move(counter_);
287 timestamp = timestamp_;
288 }
289
290 VAddr CpuAddr() const noexcept {
291 return cpu_addr;
292 }
293
294 CacheAddr CacheAddr() const noexcept {
295 return ToCacheAddr(host_ptr);
296 }
297
298 u64 SizeInBytes() const noexcept {
299 return SizeInBytes(timestamp.has_value());
300 }
301
302 static u64 SizeInBytes(bool with_timestamp) {
303 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
304 }
305
306protected:
307 /// Returns true when querying the counter may potentially block.
308 bool WaitPending() const noexcept {
309 return counter && counter->WaitPending();
310 }
311
312private:
313 static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
314 static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
315 static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
316
317 VAddr cpu_addr; ///< Guest CPU address.
318 u8* host_ptr; ///< Writable host pointer.
319 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
320 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
321};
322
323} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 74cb73209..7d5a044c7 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -20,211 +20,49 @@
20 20
21namespace OpenGL { 21namespace OpenGL {
22 22
23using VideoCore::QueryType;
24
25namespace { 23namespace {
26 24
27constexpr std::uintptr_t PAGE_SIZE = 4096;
28constexpr int PAGE_SHIFT = 12;
29
30constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp
31constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
32constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
33
34constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; 25constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
35 26
36constexpr GLenum GetTarget(QueryType type) { 27constexpr GLenum GetTarget(VideoCore::QueryType type) {
37 return QueryTargets[static_cast<std::size_t>(type)]; 28 return QueryTargets[static_cast<std::size_t>(type)];
38} 29}
39 30
40} // Anonymous namespace 31} // Anonymous namespace
41 32
42CounterStream::CounterStream(QueryCache& cache, QueryType type) 33QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
43 : cache{cache}, type{type}, target{GetTarget(type)} {} 34 : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
44 35 HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
45CounterStream::~CounterStream() = default; 36 gl_rasterizer)},
46 37 gl_rasterizer{gl_rasterizer} {}
47void CounterStream::Update(bool enabled, bool any_command_queued) {
48 if (enabled) {
49 Enable();
50 } else {
51 Disable(any_command_queued);
52 }
53}
54
55void CounterStream::Reset(bool any_command_queued) {
56 if (current) {
57 EndQuery(any_command_queued);
58
59 // Immediately start a new query to avoid disabling its state.
60 current = cache.GetHostCounter(nullptr, type);
61 }
62 last = nullptr;
63}
64
65std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
66 if (!current) {
67 return nullptr;
68 }
69 EndQuery(any_command_queued);
70 last = std::move(current);
71 current = cache.GetHostCounter(last, type);
72 return last;
73}
74
75void CounterStream::Enable() {
76 if (current) {
77 return;
78 }
79 current = cache.GetHostCounter(last, type);
80}
81
82void CounterStream::Disable(bool any_command_queued) {
83 if (current) {
84 EndQuery(any_command_queued);
85 }
86 last = std::exchange(current, nullptr);
87}
88
89void CounterStream::EndQuery(bool any_command_queued) {
90 if (!any_command_queued) {
91 // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
92 // having any of these causes a lock. glFlush is considered a command, so we can safely wait
93 // for this. Insert to the OpenGL command stream a flush.
94 glFlush();
95 }
96 glEndQuery(target);
97}
98
99QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
100 : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
101 QueryType::SamplesPassed}}} {}
102 38
103QueryCache::~QueryCache() = default; 39QueryCache::~QueryCache() = default;
104 40
105void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { 41OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
106 const u64 addr_begin = static_cast<u64>(addr); 42 auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
107 const u64 addr_end = addr_begin + static_cast<u64>(size);
108 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
109 const u64 cache_begin = query.GetCacheAddr();
110 const u64 cache_end = cache_begin + query.GetSizeInBytes();
111 return cache_begin < addr_end && addr_begin < cache_end;
112 };
113
114 const u64 page_end = addr_end >> PAGE_SHIFT;
115 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
116 const auto& it = cached_queries.find(page);
117 if (it == std::end(cached_queries)) {
118 continue;
119 }
120 auto& contents = it->second;
121 for (auto& query : contents) {
122 if (!in_range(query)) {
123 continue;
124 }
125 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
126 Flush(query);
127 }
128 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
129 std::end(contents));
130 }
131}
132
133void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
134 // We can handle flushes in the same way as invalidations.
135 InvalidateRegion(addr, size);
136}
137
138void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
139 auto& memory_manager = system.GPU().MemoryManager();
140 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
141
142 CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
143 if (!query) {
144 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
145 ASSERT_OR_EXECUTE(cpu_addr, return;);
146
147 query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
148 }
149
150 query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
151}
152
153void QueryCache::UpdateCounters() {
154 auto& samples_passed = GetStream(QueryType::SamplesPassed);
155
156 const auto& regs = system.GPU().Maxwell3D().regs;
157 samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
158}
159
160void QueryCache::ResetCounter(QueryType type) {
161 GetStream(type).Reset(rasterizer.AnyCommandQueued());
162}
163
164void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
165 reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
166}
167
168std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
169 QueryType type) {
170 auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
171 OGLQuery query; 43 OGLQuery query;
172 if (reserve.empty()) { 44 if (reserve.empty()) {
173 query.Create(GetTarget(type)); 45 query.Create(GetTarget(type));
174 } else { 46 return query;
175 query = std::move(reserve.back());
176 reserve.pop_back();
177 } 47 }
178 48
179 return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query)); 49 query = std::move(reserve.back());
50 reserve.pop_back();
51 return query;
180} 52}
181 53
182CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { 54void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
183 const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT; 55 queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
184 auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
185 rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
186 return stored_ref;
187}
188
189CachedQuery* QueryCache::TryGet(CacheAddr addr) {
190 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
191 const auto it = cached_queries.find(page);
192 if (it == std::end(cached_queries)) {
193 return nullptr;
194 }
195 auto& contents = it->second;
196 const auto found =
197 std::find_if(std::begin(contents), std::end(contents),
198 [addr](const auto& query) { return query.GetCacheAddr() == addr; });
199 return found != std::end(contents) ? &*found : nullptr;
200}
201
202void QueryCache::Flush(CachedQuery& cached_query) {
203 auto& stream = GetStream(cached_query.GetType());
204
205 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
206 // To avoid this disable and re-enable keeping the dependency stream.
207 // But we only have to do this if we have pending waits to be done.
208 const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
209 const bool any_command_queued = rasterizer.AnyCommandQueued();
210 if (slice_counter) {
211 stream.Update(false, any_command_queued);
212 }
213
214 cached_query.Flush();
215
216 if (slice_counter) {
217 stream.Update(true, any_command_queued);
218 }
219} 56}
220 57
221CounterStream& QueryCache::GetStream(QueryType type) { 58bool QueryCache::AnyCommandQueued() const noexcept {
222 return streams[static_cast<std::size_t>(type)]; 59 return gl_rasterizer.AnyCommandQueued();
223} 60}
224 61
225HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, 62HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
226 OGLQuery&& query_) 63 VideoCore::QueryType type)
227 : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { 64 : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
65 type{type}, query{cache.AllocateQuery(type)} {
228 glBeginQuery(GetTarget(type), query.handle); 66 glBeginQuery(GetTarget(type), query.handle);
229} 67}
230 68
@@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
232 cache.Reserve(type, std::move(query)); 70 cache.Reserve(type, std::move(query));
233} 71}
234 72
235u64 HostCounter::Query() { 73void HostCounter::EndQuery() {
236 if (result) { 74 if (!cache.AnyCommandQueued()) {
237 return *result; 75 // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
238 } 76 // having any of these causes a lock. glFlush is considered a command, so we can safely wait
239 77 // for this. Insert to the OpenGL command stream a flush.
240 u64 value; 78 glFlush();
241 glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
242 if (dependency) {
243 value += dependency->Query();
244 } 79 }
245 80 glEndQuery(GetTarget(type));
246 return *(result = value);
247} 81}
248 82
249bool HostCounter::WaitPending() const noexcept { 83u64 HostCounter::BlockingQuery() const {
250 return result.has_value(); 84 GLint64 value;
85 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
86 return static_cast<u64>(value);
251} 87}
252 88
253CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) 89CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
254 : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} 90 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
255 91
256CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept 92CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
257 : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, 93 : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
258 counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
259
260CachedQuery::~CachedQuery() = default;
261 94
262CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { 95CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
96 VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
97 cache = rhs.cache;
263 type = rhs.type; 98 type = rhs.type;
264 cpu_addr = rhs.cpu_addr;
265 host_ptr = rhs.host_ptr;
266 counter = std::move(rhs.counter);
267 timestamp = rhs.timestamp;
268 return *this; 99 return *this;
269} 100}
270 101
271void CachedQuery::Flush() { 102void CachedQuery::Flush() {
272 // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero 103 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
273 // in these cases. 104 // To avoid this disable and re-enable keeping the dependency stream.
274 const u64 value = counter ? counter->Query() : 0; 105 // But we only have to do this if we have pending waits to be done.
275 std::memcpy(host_ptr, &value, sizeof(u64)); 106 auto& stream = cache->Stream(type);
276 107 const bool slice_counter = WaitPending() && stream.IsEnabled();
277 if (timestamp) { 108 if (slice_counter) {
278 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); 109 stream.Update(false);
279 }
280}
281
282void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
283 if (counter) {
284 // If there's an old counter set it means the query is being rewritten by the game.
285 // To avoid losing the data forever, flush here.
286 Flush();
287 } 110 }
288 counter = std::move(counter_);
289 timestamp = timestamp_;
290}
291
292bool CachedQuery::WaitPending() const noexcept {
293 return counter && counter->WaitPending();
294}
295 111
296QueryType CachedQuery::GetType() const noexcept { 112 VideoCommon::CachedQueryBase<HostCounter>::Flush();
297 return type;
298}
299 113
300VAddr CachedQuery::GetCpuAddr() const noexcept { 114 if (slice_counter) {
301 return cpu_addr; 115 stream.Update(true);
302} 116 }
303
304CacheAddr CachedQuery::GetCacheAddr() const noexcept {
305 return ToCacheAddr(host_ptr);
306}
307
308u64 CachedQuery::GetSizeInBytes() const noexcept {
309 return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
310} 117}
311 118
312} // namespace OpenGL 119} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d9f22b44d..20d337f15 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -13,6 +13,7 @@
13#include <glad/glad.h> 13#include <glad/glad.h>
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/query_cache.h"
16#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 18#include "video_core/renderer_opengl/gl_resource_manager.h"
18 19
@@ -24,134 +25,57 @@ namespace OpenGL {
24 25
25class CachedQuery; 26class CachedQuery;
26class HostCounter; 27class HostCounter;
27class RasterizerOpenGL;
28class QueryCache; 28class QueryCache;
29class RasterizerOpenGL;
29 30
30class CounterStream final { 31using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
31public:
32 explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
33 ~CounterStream();
34
35 void Update(bool enabled, bool any_command_queued);
36
37 void Reset(bool any_command_queued);
38
39 std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
40
41 bool IsEnabled() const {
42 return current != nullptr;
43 }
44
45private:
46 void Enable();
47
48 void Disable(bool any_command_queued);
49
50 void EndQuery(bool any_command_queued);
51 32
52 QueryCache& cache; 33class QueryCache final
53 34 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
54 std::shared_ptr<HostCounter> current;
55 std::shared_ptr<HostCounter> last;
56 VideoCore::QueryType type;
57 GLenum target;
58};
59
60class QueryCache final {
61public: 35public:
62 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); 36 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
63 ~QueryCache(); 37 ~QueryCache();
64 38
65 void InvalidateRegion(CacheAddr addr, std::size_t size); 39 OGLQuery AllocateQuery(VideoCore::QueryType type);
66
67 void FlushRegion(CacheAddr addr, std::size_t size);
68
69 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
70
71 void UpdateCounters();
72
73 void ResetCounter(VideoCore::QueryType type);
74 40
75 void Reserve(VideoCore::QueryType type, OGLQuery&& query); 41 void Reserve(VideoCore::QueryType type, OGLQuery&& query);
76 42
77 std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency, 43 bool AnyCommandQueued() const noexcept;
78 VideoCore::QueryType type);
79 44
80private: 45private:
81 CachedQuery& Register(CachedQuery&& cached_query); 46 RasterizerOpenGL& gl_rasterizer;
82 47 std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
83 CachedQuery* TryGet(CacheAddr addr);
84
85 void Flush(CachedQuery& cached_query);
86
87 CounterStream& GetStream(VideoCore::QueryType type);
88
89 Core::System& system;
90 RasterizerOpenGL& rasterizer;
91
92 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
93
94 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
95 std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
96}; 48};
97 49
98class HostCounter final { 50class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
99public: 51public:
100 explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, 52 explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
101 VideoCore::QueryType type, OGLQuery&& query); 53 VideoCore::QueryType type);
102 ~HostCounter(); 54 ~HostCounter();
103 55
104 /// Returns the current value of the query. 56 void EndQuery();
105 u64 Query();
106
107 /// Returns true when querying this counter will potentially wait for OpenGL.
108 bool WaitPending() const noexcept;
109 57
110private: 58private:
59 u64 BlockingQuery() const override;
60
111 QueryCache& cache; 61 QueryCache& cache;
112 VideoCore::QueryType type; 62 VideoCore::QueryType type;
113 63 OGLQuery query;
114 std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
115 OGLQuery query; ///< OpenGL query.
116 std::optional<u64> result; ///< Added values of the counter.
117}; 64};
118 65
119class CachedQuery final { 66class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
120public: 67public:
121 explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); 68 explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
122 CachedQuery(CachedQuery&&) noexcept; 69 u8* host_ptr);
123 CachedQuery(const CachedQuery&) = delete; 70 CachedQuery(CachedQuery&& rhs) noexcept;
124 ~CachedQuery();
125
126 CachedQuery& operator=(CachedQuery&&) noexcept;
127
128 /// Writes the counter value to host memory.
129 void Flush();
130
131 /// Updates the counter this cached query registered in guest memory will write when requested.
132 void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
133 71
134 /// Returns true when a flushing this query will potentially wait for OpenGL. 72 CachedQuery& operator=(CachedQuery&& rhs) noexcept;
135 bool WaitPending() const noexcept;
136 73
137 /// Returns the query type. 74 void Flush() override;
138 VideoCore::QueryType GetType() const noexcept;
139
140 /// Returns the guest CPU address for this query.
141 VAddr GetCpuAddr() const noexcept;
142
143 /// Returns the cache address for this query.
144 CacheAddr GetCacheAddr() const noexcept;
145
146 /// Returns the number of cached bytes.
147 u64 GetSizeInBytes() const noexcept;
148 75
149private: 76private:
150 VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed). 77 QueryCache* cache;
151 VAddr cpu_addr; ///< Guest CPU address. 78 VideoCore::QueryType type;
152 u8* host_ptr; ///< Writable host pointer.
153 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
154 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
155}; 79};
156 80
157} // namespace OpenGL 81} // namespace OpenGL