summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-02-11 16:02:41 -0300
committerGravatar ReinUsesLisp2020-02-14 17:38:27 -0300
commitc31382ced54c07650ae41fa2f75dc53da894784e (patch)
treeaeff230f51000fc357bbc2859c5ef66892f9a841 /src/video_core
parentgl_query_cache: Optimize query cache (diff)
downloadyuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.gz
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.tar.xz
yuzu-c31382ced54c07650ae41fa2f75dc53da894784e.zip
query_cache: Abstract OpenGL implementation
Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation.
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/query_cache.h323
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp287
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h122
4 files changed, 394 insertions, 339 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3208f4993..bb5895e99 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -37,6 +37,7 @@ add_library(video_core STATIC
37 memory_manager.h 37 memory_manager.h
38 morton.cpp 38 morton.cpp
39 morton.h 39 morton.h
40 query_cache.h
40 rasterizer_accelerated.cpp 41 rasterizer_accelerated.cpp
41 rasterizer_accelerated.h 42 rasterizer_accelerated.h
42 rasterizer_cache.cpp 43 rasterizer_cache.cpp
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
new file mode 100644
index 000000000..4c9151ce8
--- /dev/null
+++ b/src/video_core/query_cache.h
@@ -0,0 +1,323 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <cstring>
10#include <iterator>
11#include <memory>
12#include <optional>
13#include <unordered_map>
14#include <vector>
15
16#include "common/assert.h"
17#include "core/core.h"
18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/gpu.h"
20#include "video_core/memory_manager.h"
21#include "video_core/rasterizer_interface.h"
22
23namespace VideoCommon {
24
25template <class QueryCache, class HostCounter>
26class CounterStreamBase {
27public:
28 explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
29 : cache{cache}, type{type} {}
30
31 /// Updates the state of the stream, enabling or disabling as needed.
32 void Update(bool enabled) {
33 if (enabled) {
34 Enable();
35 } else {
36 Disable();
37 }
38 }
39
40 /// Resets the stream to zero. It doesn't disable the query after resetting.
41 void Reset() {
42 if (current) {
43 current->EndQuery();
44
45 // Immediately start a new query to avoid disabling its state.
46 current = cache.Counter(nullptr, type);
47 }
48 last = nullptr;
49 }
50
51 /// Returns the current counter slicing as needed.
52 std::shared_ptr<HostCounter> Current() {
53 if (!current) {
54 return nullptr;
55 }
56 current->EndQuery();
57 last = std::move(current);
58 current = cache.Counter(last, type);
59 return last;
60 }
61
62 /// Returns true when the counter stream is enabled.
63 bool IsEnabled() const {
64 return static_cast<bool>(current);
65 }
66
67private:
68 /// Enables the stream.
69 void Enable() {
70 if (current) {
71 return;
72 }
73 current = cache.Counter(last, type);
74 }
75
76 // Disables the stream.
77 void Disable() {
78 if (current) {
79 current->EndQuery();
80 }
81 last = std::exchange(current, nullptr);
82 }
83
84 QueryCache& cache;
85 const VideoCore::QueryType type;
86
87 std::shared_ptr<HostCounter> current;
88 std::shared_ptr<HostCounter> last;
89};
90
91template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
92class QueryCacheBase {
93public:
94 explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
95 : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
96 static_cast<QueryCache&>(*this),
97 VideoCore::QueryType::SamplesPassed}}} {}
98
99 void InvalidateRegion(CacheAddr addr, std::size_t size) {
100 FlushAndRemoveRegion(addr, size);
101 }
102
103 void FlushRegion(CacheAddr addr, std::size_t size) {
104 FlushAndRemoveRegion(addr, size);
105 }
106
107 /**
108 * Records a query in GPU mapped memory, potentially marked with a timestamp.
109 * @param gpu_addr GPU address to flush to when the mapped memory is read.
110 * @param type Query type, e.g. SamplesPassed.
111 * @param timestamp Timestamp, when empty the flushed query is assumed to be short.
112 */
113 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
114 auto& memory_manager = system.GPU().MemoryManager();
115 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
116
117 CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
118 if (!query) {
119 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
120 ASSERT_OR_EXECUTE(cpu_addr, return;);
121
122 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
123 }
124
125 query->BindCounter(Stream(type).Current(), timestamp);
126 }
127
128 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
129 void UpdateCounters() {
130 const auto& regs = system.GPU().Maxwell3D().regs;
131 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
132 }
133
134 /// Resets a counter to zero. It doesn't disable the query after resetting.
135 void ResetCounter(VideoCore::QueryType type) {
136 Stream(type).Reset();
137 }
138
139 /// Returns a new host counter.
140 std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
141 VideoCore::QueryType type) {
142 return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
143 type);
144 }
145
146 /// Returns the counter stream of the specified type.
147 CounterStream& Stream(VideoCore::QueryType type) {
148 return streams[static_cast<std::size_t>(type)];
149 }
150
151private:
152 /// Flushes a memory range to guest memory and removes it from the cache.
153 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
154 const u64 addr_begin = static_cast<u64>(addr);
155 const u64 addr_end = addr_begin + static_cast<u64>(size);
156 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
157 const u64 cache_begin = query.CacheAddr();
158 const u64 cache_end = cache_begin + query.SizeInBytes();
159 return cache_begin < addr_end && addr_begin < cache_end;
160 };
161
162 const u64 page_end = addr_end >> PAGE_SHIFT;
163 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
164 const auto& it = cached_queries.find(page);
165 if (it == std::end(cached_queries)) {
166 continue;
167 }
168 auto& contents = it->second;
169 for (auto& query : contents) {
170 if (!in_range(query)) {
171 continue;
172 }
173 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
174 query.Flush();
175 }
176 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
177 std::end(contents));
178 }
179 }
180
181 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
182 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
183 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
184 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
185 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
186 host_ptr);
187 }
188
189 /// Tries to a get a cached query. Returns nullptr on failure.
190 CachedQuery* TryGet(CacheAddr addr) {
191 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
192 const auto it = cached_queries.find(page);
193 if (it == std::end(cached_queries)) {
194 return nullptr;
195 }
196 auto& contents = it->second;
197 const auto found = std::find_if(std::begin(contents), std::end(contents),
198 [addr](auto& query) { return query.CacheAddr() == addr; });
199 return found != std::end(contents) ? &*found : nullptr;
200 }
201
202 static constexpr std::uintptr_t PAGE_SIZE = 4096;
203 static constexpr int PAGE_SHIFT = 12;
204
205 Core::System& system;
206 VideoCore::RasterizerInterface& rasterizer;
207
208 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
209
210 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
211};
212
213template <class QueryCache, class HostCounter>
214class HostCounterBase {
215public:
216 explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
217 : dependency{std::move(dependency)} {}
218
219 /// Returns the current value of the query.
220 u64 Query() {
221 if (result) {
222 return *result;
223 }
224
225 u64 value = BlockingQuery();
226 if (dependency) {
227 value += dependency->Query();
228 }
229
230 return *(result = value);
231 }
232
233 /// Returns true when flushing this query will potentially wait.
234 bool WaitPending() const noexcept {
235 return result.has_value();
236 }
237
238protected:
239 /// Returns the value of query from the backend API blocking as needed.
240 virtual u64 BlockingQuery() const = 0;
241
242private:
243 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
244 std::optional<u64> result; ///< Filled with the already returned value.
245};
246
247template <class HostCounter>
248class CachedQueryBase {
249public:
250 explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
251 : cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
252
253 CachedQueryBase(CachedQueryBase&& rhs) noexcept
254 : cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)},
255 timestamp{rhs.timestamp} {}
256
257 CachedQueryBase(const CachedQueryBase&) = delete;
258
259 CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept {
260 cpu_addr = rhs.cpu_addr;
261 host_ptr = rhs.host_ptr;
262 counter = std::move(rhs.counter);
263 timestamp = rhs.timestamp;
264 return *this;
265 }
266
267 /// Flushes the query to guest memory.
268 virtual void Flush() {
269 // When counter is nullptr it means that it's just been reseted. We are supposed to write a
270 // zero in these cases.
271 const u64 value = counter ? counter->Query() : 0;
272 std::memcpy(host_ptr, &value, sizeof(u64));
273
274 if (timestamp) {
275 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
276 }
277 }
278
279 /// Binds a counter to this query.
280 void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
281 if (counter) {
282 // If there's an old counter set it means the query is being rewritten by the game.
283 // To avoid losing the data forever, flush here.
284 Flush();
285 }
286 counter = std::move(counter_);
287 timestamp = timestamp_;
288 }
289
290 VAddr CpuAddr() const noexcept {
291 return cpu_addr;
292 }
293
294 CacheAddr CacheAddr() const noexcept {
295 return ToCacheAddr(host_ptr);
296 }
297
298 u64 SizeInBytes() const noexcept {
299 return SizeInBytes(timestamp.has_value());
300 }
301
302 static u64 SizeInBytes(bool with_timestamp) {
303 return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
304 }
305
306protected:
307 /// Returns true when querying the counter may potentially block.
308 bool WaitPending() const noexcept {
309 return counter && counter->WaitPending();
310 }
311
312private:
313 static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
314 static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
315 static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
316
317 VAddr cpu_addr; ///< Guest CPU address.
318 u8* host_ptr; ///< Writable host pointer.
319 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
320 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
321};
322
323} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 74cb73209..7d5a044c7 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -20,211 +20,49 @@
20 20
21namespace OpenGL { 21namespace OpenGL {
22 22
23using VideoCore::QueryType;
24
25namespace { 23namespace {
26 24
27constexpr std::uintptr_t PAGE_SIZE = 4096;
28constexpr int PAGE_SHIFT = 12;
29
30constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp
31constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
32constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
33
34constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; 25constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
35 26
36constexpr GLenum GetTarget(QueryType type) { 27constexpr GLenum GetTarget(VideoCore::QueryType type) {
37 return QueryTargets[static_cast<std::size_t>(type)]; 28 return QueryTargets[static_cast<std::size_t>(type)];
38} 29}
39 30
40} // Anonymous namespace 31} // Anonymous namespace
41 32
42CounterStream::CounterStream(QueryCache& cache, QueryType type) 33QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
43 : cache{cache}, type{type}, target{GetTarget(type)} {} 34 : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
44 35 HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
45CounterStream::~CounterStream() = default; 36 gl_rasterizer)},
46 37 gl_rasterizer{gl_rasterizer} {}
47void CounterStream::Update(bool enabled, bool any_command_queued) {
48 if (enabled) {
49 Enable();
50 } else {
51 Disable(any_command_queued);
52 }
53}
54
55void CounterStream::Reset(bool any_command_queued) {
56 if (current) {
57 EndQuery(any_command_queued);
58
59 // Immediately start a new query to avoid disabling its state.
60 current = cache.GetHostCounter(nullptr, type);
61 }
62 last = nullptr;
63}
64
65std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
66 if (!current) {
67 return nullptr;
68 }
69 EndQuery(any_command_queued);
70 last = std::move(current);
71 current = cache.GetHostCounter(last, type);
72 return last;
73}
74
75void CounterStream::Enable() {
76 if (current) {
77 return;
78 }
79 current = cache.GetHostCounter(last, type);
80}
81
82void CounterStream::Disable(bool any_command_queued) {
83 if (current) {
84 EndQuery(any_command_queued);
85 }
86 last = std::exchange(current, nullptr);
87}
88
89void CounterStream::EndQuery(bool any_command_queued) {
90 if (!any_command_queued) {
91 // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
92 // having any of these causes a lock. glFlush is considered a command, so we can safely wait
93 // for this. Insert to the OpenGL command stream a flush.
94 glFlush();
95 }
96 glEndQuery(target);
97}
98
99QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
100 : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
101 QueryType::SamplesPassed}}} {}
102 38
103QueryCache::~QueryCache() = default; 39QueryCache::~QueryCache() = default;
104 40
105void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { 41OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
106 const u64 addr_begin = static_cast<u64>(addr); 42 auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
107 const u64 addr_end = addr_begin + static_cast<u64>(size);
108 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
109 const u64 cache_begin = query.GetCacheAddr();
110 const u64 cache_end = cache_begin + query.GetSizeInBytes();
111 return cache_begin < addr_end && addr_begin < cache_end;
112 };
113
114 const u64 page_end = addr_end >> PAGE_SHIFT;
115 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
116 const auto& it = cached_queries.find(page);
117 if (it == std::end(cached_queries)) {
118 continue;
119 }
120 auto& contents = it->second;
121 for (auto& query : contents) {
122 if (!in_range(query)) {
123 continue;
124 }
125 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
126 Flush(query);
127 }
128 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
129 std::end(contents));
130 }
131}
132
133void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
134 // We can handle flushes in the same way as invalidations.
135 InvalidateRegion(addr, size);
136}
137
138void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
139 auto& memory_manager = system.GPU().MemoryManager();
140 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
141
142 CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
143 if (!query) {
144 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
145 ASSERT_OR_EXECUTE(cpu_addr, return;);
146
147 query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
148 }
149
150 query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
151}
152
153void QueryCache::UpdateCounters() {
154 auto& samples_passed = GetStream(QueryType::SamplesPassed);
155
156 const auto& regs = system.GPU().Maxwell3D().regs;
157 samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
158}
159
160void QueryCache::ResetCounter(QueryType type) {
161 GetStream(type).Reset(rasterizer.AnyCommandQueued());
162}
163
164void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
165 reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
166}
167
168std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
169 QueryType type) {
170 auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
171 OGLQuery query; 43 OGLQuery query;
172 if (reserve.empty()) { 44 if (reserve.empty()) {
173 query.Create(GetTarget(type)); 45 query.Create(GetTarget(type));
174 } else { 46 return query;
175 query = std::move(reserve.back());
176 reserve.pop_back();
177 } 47 }
178 48
179 return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query)); 49 query = std::move(reserve.back());
50 reserve.pop_back();
51 return query;
180} 52}
181 53
182CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { 54void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
183 const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT; 55 queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
184 auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
185 rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
186 return stored_ref;
187}
188
189CachedQuery* QueryCache::TryGet(CacheAddr addr) {
190 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
191 const auto it = cached_queries.find(page);
192 if (it == std::end(cached_queries)) {
193 return nullptr;
194 }
195 auto& contents = it->second;
196 const auto found =
197 std::find_if(std::begin(contents), std::end(contents),
198 [addr](const auto& query) { return query.GetCacheAddr() == addr; });
199 return found != std::end(contents) ? &*found : nullptr;
200}
201
202void QueryCache::Flush(CachedQuery& cached_query) {
203 auto& stream = GetStream(cached_query.GetType());
204
205 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
206 // To avoid this disable and re-enable keeping the dependency stream.
207 // But we only have to do this if we have pending waits to be done.
208 const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
209 const bool any_command_queued = rasterizer.AnyCommandQueued();
210 if (slice_counter) {
211 stream.Update(false, any_command_queued);
212 }
213
214 cached_query.Flush();
215
216 if (slice_counter) {
217 stream.Update(true, any_command_queued);
218 }
219} 56}
220 57
221CounterStream& QueryCache::GetStream(QueryType type) { 58bool QueryCache::AnyCommandQueued() const noexcept {
222 return streams[static_cast<std::size_t>(type)]; 59 return gl_rasterizer.AnyCommandQueued();
223} 60}
224 61
225HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, 62HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
226 OGLQuery&& query_) 63 VideoCore::QueryType type)
227 : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { 64 : VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
65 type{type}, query{cache.AllocateQuery(type)} {
228 glBeginQuery(GetTarget(type), query.handle); 66 glBeginQuery(GetTarget(type), query.handle);
229} 67}
230 68
@@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
232 cache.Reserve(type, std::move(query)); 70 cache.Reserve(type, std::move(query));
233} 71}
234 72
235u64 HostCounter::Query() { 73void HostCounter::EndQuery() {
236 if (result) { 74 if (!cache.AnyCommandQueued()) {
237 return *result; 75 // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
238 } 76 // having any of these causes a lock. glFlush is considered a command, so we can safely wait
239 77 // for this. Insert to the OpenGL command stream a flush.
240 u64 value; 78 glFlush();
241 glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
242 if (dependency) {
243 value += dependency->Query();
244 } 79 }
245 80 glEndQuery(GetTarget(type));
246 return *(result = value);
247} 81}
248 82
249bool HostCounter::WaitPending() const noexcept { 83u64 HostCounter::BlockingQuery() const {
250 return result.has_value(); 84 GLint64 value;
85 glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
86 return static_cast<u64>(value);
251} 87}
252 88
253CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) 89CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
254 : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} 90 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
255 91
256CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept 92CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
257 : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, 93 : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
258 counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
259
260CachedQuery::~CachedQuery() = default;
261 94
262CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { 95CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
96 VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
97 cache = rhs.cache;
263 type = rhs.type; 98 type = rhs.type;
264 cpu_addr = rhs.cpu_addr;
265 host_ptr = rhs.host_ptr;
266 counter = std::move(rhs.counter);
267 timestamp = rhs.timestamp;
268 return *this; 99 return *this;
269} 100}
270 101
271void CachedQuery::Flush() { 102void CachedQuery::Flush() {
272 // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero 103 // Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
273 // in these cases. 104 // To avoid this disable and re-enable keeping the dependency stream.
274 const u64 value = counter ? counter->Query() : 0; 105 // But we only have to do this if we have pending waits to be done.
275 std::memcpy(host_ptr, &value, sizeof(u64)); 106 auto& stream = cache->Stream(type);
276 107 const bool slice_counter = WaitPending() && stream.IsEnabled();
277 if (timestamp) { 108 if (slice_counter) {
278 std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); 109 stream.Update(false);
279 }
280}
281
282void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
283 if (counter) {
284 // If there's an old counter set it means the query is being rewritten by the game.
285 // To avoid losing the data forever, flush here.
286 Flush();
287 } 110 }
288 counter = std::move(counter_);
289 timestamp = timestamp_;
290}
291
292bool CachedQuery::WaitPending() const noexcept {
293 return counter && counter->WaitPending();
294}
295 111
296QueryType CachedQuery::GetType() const noexcept { 112 VideoCommon::CachedQueryBase<HostCounter>::Flush();
297 return type;
298}
299 113
300VAddr CachedQuery::GetCpuAddr() const noexcept { 114 if (slice_counter) {
301 return cpu_addr; 115 stream.Update(true);
302} 116 }
303
304CacheAddr CachedQuery::GetCacheAddr() const noexcept {
305 return ToCacheAddr(host_ptr);
306}
307
308u64 CachedQuery::GetSizeInBytes() const noexcept {
309 return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
310} 117}
311 118
312} // namespace OpenGL 119} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d9f22b44d..20d337f15 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -13,6 +13,7 @@
13#include <glad/glad.h> 13#include <glad/glad.h>
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/query_cache.h"
16#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 18#include "video_core/renderer_opengl/gl_resource_manager.h"
18 19
@@ -24,134 +25,57 @@ namespace OpenGL {
24 25
25class CachedQuery; 26class CachedQuery;
26class HostCounter; 27class HostCounter;
27class RasterizerOpenGL;
28class QueryCache; 28class QueryCache;
29class RasterizerOpenGL;
29 30
30class CounterStream final { 31using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
31public:
32 explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
33 ~CounterStream();
34
35 void Update(bool enabled, bool any_command_queued);
36
37 void Reset(bool any_command_queued);
38
39 std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
40
41 bool IsEnabled() const {
42 return current != nullptr;
43 }
44
45private:
46 void Enable();
47
48 void Disable(bool any_command_queued);
49
50 void EndQuery(bool any_command_queued);
51 32
52 QueryCache& cache; 33class QueryCache final
53 34 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
54 std::shared_ptr<HostCounter> current;
55 std::shared_ptr<HostCounter> last;
56 VideoCore::QueryType type;
57 GLenum target;
58};
59
60class QueryCache final {
61public: 35public:
62 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); 36 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
63 ~QueryCache(); 37 ~QueryCache();
64 38
65 void InvalidateRegion(CacheAddr addr, std::size_t size); 39 OGLQuery AllocateQuery(VideoCore::QueryType type);
66
67 void FlushRegion(CacheAddr addr, std::size_t size);
68
69 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
70
71 void UpdateCounters();
72
73 void ResetCounter(VideoCore::QueryType type);
74 40
75 void Reserve(VideoCore::QueryType type, OGLQuery&& query); 41 void Reserve(VideoCore::QueryType type, OGLQuery&& query);
76 42
77 std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency, 43 bool AnyCommandQueued() const noexcept;
78 VideoCore::QueryType type);
79 44
80private: 45private:
81 CachedQuery& Register(CachedQuery&& cached_query); 46 RasterizerOpenGL& gl_rasterizer;
82 47 std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
83 CachedQuery* TryGet(CacheAddr addr);
84
85 void Flush(CachedQuery& cached_query);
86
87 CounterStream& GetStream(VideoCore::QueryType type);
88
89 Core::System& system;
90 RasterizerOpenGL& rasterizer;
91
92 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
93
94 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
95 std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
96}; 48};
97 49
98class HostCounter final { 50class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
99public: 51public:
100 explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, 52 explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
101 VideoCore::QueryType type, OGLQuery&& query); 53 VideoCore::QueryType type);
102 ~HostCounter(); 54 ~HostCounter();
103 55
104 /// Returns the current value of the query. 56 void EndQuery();
105 u64 Query();
106
107 /// Returns true when querying this counter will potentially wait for OpenGL.
108 bool WaitPending() const noexcept;
109 57
110private: 58private:
59 u64 BlockingQuery() const override;
60
111 QueryCache& cache; 61 QueryCache& cache;
112 VideoCore::QueryType type; 62 VideoCore::QueryType type;
113 63 OGLQuery query;
114 std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
115 OGLQuery query; ///< OpenGL query.
116 std::optional<u64> result; ///< Added values of the counter.
117}; 64};
118 65
119class CachedQuery final { 66class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
120public: 67public:
121 explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); 68 explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
122 CachedQuery(CachedQuery&&) noexcept; 69 u8* host_ptr);
123 CachedQuery(const CachedQuery&) = delete; 70 CachedQuery(CachedQuery&& rhs) noexcept;
124 ~CachedQuery();
125
126 CachedQuery& operator=(CachedQuery&&) noexcept;
127
128 /// Writes the counter value to host memory.
129 void Flush();
130
131 /// Updates the counter this cached query registered in guest memory will write when requested.
132 void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
133 71
134 /// Returns true when a flushing this query will potentially wait for OpenGL. 72 CachedQuery& operator=(CachedQuery&& rhs) noexcept;
135 bool WaitPending() const noexcept;
136 73
137 /// Returns the query type. 74 void Flush() override;
138 VideoCore::QueryType GetType() const noexcept;
139
140 /// Returns the guest CPU address for this query.
141 VAddr GetCpuAddr() const noexcept;
142
143 /// Returns the cache address for this query.
144 CacheAddr GetCacheAddr() const noexcept;
145
146 /// Returns the number of cached bytes.
147 u64 GetSizeInBytes() const noexcept;
148 75
149private: 76private:
150 VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed). 77 QueryCache* cache;
151 VAddr cpu_addr; ///< Guest CPU address. 78 VideoCore::QueryType type;
152 u8* host_ptr; ///< Writable host pointer.
153 std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
154 std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
155}; 79};
156 80
157} // namespace OpenGL 81} // namespace OpenGL