diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.cpp | 214 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_query_cache.h | 61 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 |
6 files changed, 217 insertions, 79 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 9add2bc94..842cdcbcf 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <cinttypes> | 5 | #include <cinttypes> |
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <optional> | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "core/core.h" | 9 | #include "core/core.h" |
| 9 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| @@ -16,6 +17,8 @@ | |||
| 16 | 17 | ||
| 17 | namespace Tegra::Engines { | 18 | namespace Tegra::Engines { |
| 18 | 19 | ||
| 20 | using VideoCore::QueryType; | ||
| 21 | |||
| 19 | /// First register id that is actually a Macro call. | 22 | /// First register id that is actually a Macro call. |
| 20 | constexpr u32 MacroRegistersStart = 0xE00; | 23 | constexpr u32 MacroRegistersStart = 0xE00; |
| 21 | 24 | ||
| @@ -614,10 +617,11 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 614 | void Maxwell3D::ProcessCounterReset() { | 617 | void Maxwell3D::ProcessCounterReset() { |
| 615 | switch (regs.counter_reset) { | 618 | switch (regs.counter_reset) { |
| 616 | case Regs::CounterReset::SampleCnt: | 619 | case Regs::CounterReset::SampleCnt: |
| 617 | rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed); | 620 | rasterizer.ResetCounter(QueryType::SamplesPassed); |
| 618 | break; | 621 | break; |
| 619 | default: | 622 | default: |
| 620 | UNIMPLEMENTED_MSG("counter_reset={}", static_cast<u32>(regs.counter_reset)); | 623 | LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", |
| 624 | static_cast<int>(regs.counter_reset)); | ||
| 621 | break; | 625 | break; |
| 622 | } | 626 | } |
| 623 | } | 627 | } |
| @@ -670,7 +674,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
| 670 | return 0; | 674 | return 0; |
| 671 | case Regs::QuerySelect::SamplesPassed: | 675 | case Regs::QuerySelect::SamplesPassed: |
| 672 | // Deferred. | 676 | // Deferred. |
| 673 | rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed); | 677 | rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, |
| 678 | system.GPU().GetTicks()); | ||
| 674 | return {}; | 679 | return {}; |
| 675 | default: | 680 | default: |
| 676 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 681 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a394f2d3e..e9f1436f0 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <optional> | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 11 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 12 | #include "video_core/gpu.h" |
| @@ -50,7 +51,7 @@ public: | |||
| 50 | virtual void ResetCounter(QueryType type) = 0; | 51 | virtual void ResetCounter(QueryType type) = 0; |
| 51 | 52 | ||
| 52 | /// Records a GPU query and caches it | 53 | /// Records a GPU query and caches it |
| 53 | virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0; | 54 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |
| 54 | 55 | ||
| 55 | /// Notify rasterizer that all caches should be flushed to Switch memory | 56 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 56 | virtual void FlushAll() = 0; | 57 | virtual void FlushAll() = 0; |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index 8f0e8241d..74cb73209 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp | |||
| @@ -2,8 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <cstring> | 6 | #include <cstring> |
| 6 | #include <memory> | 7 | #include <memory> |
| 8 | #include <unordered_map> | ||
| 7 | #include <utility> | 9 | #include <utility> |
| 8 | #include <vector> | 10 | #include <vector> |
| 9 | 11 | ||
| @@ -22,6 +24,13 @@ using VideoCore::QueryType; | |||
| 22 | 24 | ||
| 23 | namespace { | 25 | namespace { |
| 24 | 26 | ||
| 27 | constexpr std::uintptr_t PAGE_SIZE = 4096; | ||
| 28 | constexpr int PAGE_SHIFT = 12; | ||
| 29 | |||
| 30 | constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp | ||
| 31 | constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp | ||
| 32 | constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8; | ||
| 33 | |||
| 25 | constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; | 34 | constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; |
| 26 | 35 | ||
| 27 | constexpr GLenum GetTarget(QueryType type) { | 36 | constexpr GLenum GetTarget(QueryType type) { |
| @@ -37,23 +46,19 @@ CounterStream::~CounterStream() = default; | |||
| 37 | 46 | ||
| 38 | void CounterStream::Update(bool enabled, bool any_command_queued) { | 47 | void CounterStream::Update(bool enabled, bool any_command_queued) { |
| 39 | if (enabled) { | 48 | if (enabled) { |
| 40 | if (!current) { | 49 | Enable(); |
| 41 | current = cache.GetHostCounter(last, type); | 50 | } else { |
| 42 | } | 51 | Disable(any_command_queued); |
| 43 | return; | ||
| 44 | } | ||
| 45 | |||
| 46 | if (current) { | ||
| 47 | EndQuery(any_command_queued); | ||
| 48 | } | 52 | } |
| 49 | last = std::exchange(current, nullptr); | ||
| 50 | } | 53 | } |
| 51 | 54 | ||
| 52 | void CounterStream::Reset(bool any_command_queued) { | 55 | void CounterStream::Reset(bool any_command_queued) { |
| 53 | if (current) { | 56 | if (current) { |
| 54 | EndQuery(any_command_queued); | 57 | EndQuery(any_command_queued); |
| 58 | |||
| 59 | // Immediately start a new query to avoid disabling its state. | ||
| 60 | current = cache.GetHostCounter(nullptr, type); | ||
| 55 | } | 61 | } |
| 56 | current = nullptr; | ||
| 57 | last = nullptr; | 62 | last = nullptr; |
| 58 | } | 63 | } |
| 59 | 64 | ||
| @@ -67,6 +72,20 @@ std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) | |||
| 67 | return last; | 72 | return last; |
| 68 | } | 73 | } |
| 69 | 74 | ||
| 75 | void CounterStream::Enable() { | ||
| 76 | if (current) { | ||
| 77 | return; | ||
| 78 | } | ||
| 79 | current = cache.GetHostCounter(last, type); | ||
| 80 | } | ||
| 81 | |||
| 82 | void CounterStream::Disable(bool any_command_queued) { | ||
| 83 | if (current) { | ||
| 84 | EndQuery(any_command_queued); | ||
| 85 | } | ||
| 86 | last = std::exchange(current, nullptr); | ||
| 87 | } | ||
| 88 | |||
| 70 | void CounterStream::EndQuery(bool any_command_queued) { | 89 | void CounterStream::EndQuery(bool any_command_queued) { |
| 71 | if (!any_command_queued) { | 90 | if (!any_command_queued) { |
| 72 | // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not | 91 | // There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not |
| @@ -78,26 +97,57 @@ void CounterStream::EndQuery(bool any_command_queued) { | |||
| 78 | } | 97 | } |
| 79 | 98 | ||
| 80 | QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) | 99 | QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer) |
| 81 | : RasterizerCache{rasterizer}, system{system}, | 100 | : system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this, |
| 82 | rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {} | 101 | QueryType::SamplesPassed}}} {} |
| 83 | 102 | ||
| 84 | QueryCache::~QueryCache() = default; | 103 | QueryCache::~QueryCache() = default; |
| 85 | 104 | ||
| 86 | void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) { | 105 | void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) { |
| 106 | const u64 addr_begin = static_cast<u64>(addr); | ||
| 107 | const u64 addr_end = addr_begin + static_cast<u64>(size); | ||
| 108 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | ||
| 109 | const u64 cache_begin = query.GetCacheAddr(); | ||
| 110 | const u64 cache_end = cache_begin + query.GetSizeInBytes(); | ||
| 111 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 112 | }; | ||
| 113 | |||
| 114 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 115 | for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 116 | const auto& it = cached_queries.find(page); | ||
| 117 | if (it == std::end(cached_queries)) { | ||
| 118 | continue; | ||
| 119 | } | ||
| 120 | auto& contents = it->second; | ||
| 121 | for (auto& query : contents) { | ||
| 122 | if (!in_range(query)) { | ||
| 123 | continue; | ||
| 124 | } | ||
| 125 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1); | ||
| 126 | Flush(query); | ||
| 127 | } | ||
| 128 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | ||
| 129 | std::end(contents)); | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 134 | // We can handle flushes in the same way as invalidations. | ||
| 135 | InvalidateRegion(addr, size); | ||
| 136 | } | ||
| 137 | |||
| 138 | void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) { | ||
| 87 | auto& memory_manager = system.GPU().MemoryManager(); | 139 | auto& memory_manager = system.GPU().MemoryManager(); |
| 88 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 140 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
| 89 | 141 | ||
| 90 | auto query = TryGet(host_ptr); | 142 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); |
| 91 | if (!query) { | 143 | if (!query) { |
| 92 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | 144 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
| 93 | ASSERT_OR_EXECUTE(cpu_addr, return;); | 145 | ASSERT_OR_EXECUTE(cpu_addr, return;); |
| 94 | 146 | ||
| 95 | query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr); | 147 | query = &Register(CachedQuery(type, *cpu_addr, host_ptr)); |
| 96 | Register(query); | ||
| 97 | } | 148 | } |
| 98 | 149 | ||
| 99 | query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued())); | 150 | query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp); |
| 100 | query->MarkAsModified(true, *this); | ||
| 101 | } | 151 | } |
| 102 | 152 | ||
| 103 | void QueryCache::UpdateCounters() { | 153 | void QueryCache::UpdateCounters() { |
| @@ -117,34 +167,54 @@ void QueryCache::Reserve(QueryType type, OGLQuery&& query) { | |||
| 117 | 167 | ||
| 118 | std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency, | 168 | std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency, |
| 119 | QueryType type) { | 169 | QueryType type) { |
| 120 | const auto type_index = static_cast<std::size_t>(type); | 170 | auto& reserve = reserved_queries[static_cast<std::size_t>(type)]; |
| 121 | auto& reserve = reserved_queries[type_index]; | 171 | OGLQuery query; |
| 122 | |||
| 123 | if (reserve.empty()) { | 172 | if (reserve.empty()) { |
| 124 | return std::make_shared<HostCounter>(*this, std::move(dependency), type); | 173 | query.Create(GetTarget(type)); |
| 174 | } else { | ||
| 175 | query = std::move(reserve.back()); | ||
| 176 | reserve.pop_back(); | ||
| 125 | } | 177 | } |
| 126 | 178 | ||
| 127 | auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type, | 179 | return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query)); |
| 128 | std::move(reserve.back())); | 180 | } |
| 129 | reserve.pop_back(); | 181 | |
| 130 | return counter; | 182 | CachedQuery& QueryCache::Register(CachedQuery&& cached_query) { |
| 183 | const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT; | ||
| 184 | auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query)); | ||
| 185 | rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1); | ||
| 186 | return stored_ref; | ||
| 187 | } | ||
| 188 | |||
| 189 | CachedQuery* QueryCache::TryGet(CacheAddr addr) { | ||
| 190 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | ||
| 191 | const auto it = cached_queries.find(page); | ||
| 192 | if (it == std::end(cached_queries)) { | ||
| 193 | return nullptr; | ||
| 194 | } | ||
| 195 | auto& contents = it->second; | ||
| 196 | const auto found = | ||
| 197 | std::find_if(std::begin(contents), std::end(contents), | ||
| 198 | [addr](const auto& query) { return query.GetCacheAddr() == addr; }); | ||
| 199 | return found != std::end(contents) ? &*found : nullptr; | ||
| 131 | } | 200 | } |
| 132 | 201 | ||
| 133 | void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) { | 202 | void QueryCache::Flush(CachedQuery& cached_query) { |
| 134 | auto& counter = *counter_; | 203 | auto& stream = GetStream(cached_query.GetType()); |
| 135 | auto& stream = GetStream(counter.GetType()); | ||
| 136 | 204 | ||
| 137 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. | 205 | // Waiting for a query while another query of the same target is enabled locks Nvidia's driver. |
| 138 | // To avoid this disable and re-enable keeping the dependency stream. | 206 | // To avoid this disable and re-enable keeping the dependency stream. |
| 139 | const bool is_enabled = stream.IsEnabled(); | 207 | // But we only have to do this if we have pending waits to be done. |
| 140 | if (is_enabled) { | 208 | const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending(); |
| 141 | stream.Update(false, false); | 209 | const bool any_command_queued = rasterizer.AnyCommandQueued(); |
| 210 | if (slice_counter) { | ||
| 211 | stream.Update(false, any_command_queued); | ||
| 142 | } | 212 | } |
| 143 | 213 | ||
| 144 | counter.Flush(); | 214 | cached_query.Flush(); |
| 145 | 215 | ||
| 146 | if (is_enabled) { | 216 | if (slice_counter) { |
| 147 | stream.Update(true, false); | 217 | stream.Update(true, any_command_queued); |
| 148 | } | 218 | } |
| 149 | } | 219 | } |
| 150 | 220 | ||
| @@ -152,13 +222,6 @@ CounterStream& QueryCache::GetStream(QueryType type) { | |||
| 152 | return streams[static_cast<std::size_t>(type)]; | 222 | return streams[static_cast<std::size_t>(type)]; |
| 153 | } | 223 | } |
| 154 | 224 | ||
| 155 | HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type) | ||
| 156 | : cache{cache}, type{type}, dependency{std::move(dependency)} { | ||
| 157 | const GLenum target = GetTarget(type); | ||
| 158 | query.Create(target); | ||
| 159 | glBeginQuery(target, query.handle); | ||
| 160 | } | ||
| 161 | |||
| 162 | HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, | 225 | HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type, |
| 163 | OGLQuery&& query_) | 226 | OGLQuery&& query_) |
| 164 | : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { | 227 | : cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} { |
| @@ -170,35 +233,80 @@ HostCounter::~HostCounter() { | |||
| 170 | } | 233 | } |
| 171 | 234 | ||
| 172 | u64 HostCounter::Query() { | 235 | u64 HostCounter::Query() { |
| 173 | if (query.handle == 0) { | 236 | if (result) { |
| 174 | return result; | 237 | return *result; |
| 175 | } | 238 | } |
| 176 | 239 | ||
| 177 | glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result); | 240 | u64 value; |
| 178 | 241 | glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value); | |
| 179 | if (dependency) { | 242 | if (dependency) { |
| 180 | result += dependency->Query(); | 243 | value += dependency->Query(); |
| 181 | } | 244 | } |
| 182 | 245 | ||
| 183 | return result; | 246 | return *(result = value); |
| 247 | } | ||
| 248 | |||
| 249 | bool HostCounter::WaitPending() const noexcept { | ||
| 250 | return result.has_value(); | ||
| 184 | } | 251 | } |
| 185 | 252 | ||
| 186 | CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) | 253 | CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr) |
| 187 | : RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} | 254 | : type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {} |
| 255 | |||
| 256 | CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept | ||
| 257 | : type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, | ||
| 258 | counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {} | ||
| 188 | 259 | ||
| 189 | CachedQuery::~CachedQuery() = default; | 260 | CachedQuery::~CachedQuery() = default; |
| 190 | 261 | ||
| 262 | CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { | ||
| 263 | type = rhs.type; | ||
| 264 | cpu_addr = rhs.cpu_addr; | ||
| 265 | host_ptr = rhs.host_ptr; | ||
| 266 | counter = std::move(rhs.counter); | ||
| 267 | timestamp = rhs.timestamp; | ||
| 268 | return *this; | ||
| 269 | } | ||
| 270 | |||
| 191 | void CachedQuery::Flush() { | 271 | void CachedQuery::Flush() { |
| 192 | const u64 value = counter->Query(); | 272 | // When counter is nullptr it means that it's just been reseted. We are supposed to write a zero |
| 193 | std::memcpy(host_ptr, &value, sizeof(value)); | 273 | // in these cases. |
| 274 | const u64 value = counter ? counter->Query() : 0; | ||
| 275 | std::memcpy(host_ptr, &value, sizeof(u64)); | ||
| 276 | |||
| 277 | if (timestamp) { | ||
| 278 | std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64)); | ||
| 279 | } | ||
| 194 | } | 280 | } |
| 195 | 281 | ||
| 196 | void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) { | 282 | void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) { |
| 283 | if (counter) { | ||
| 284 | // If there's an old counter set it means the query is being rewritten by the game. | ||
| 285 | // To avoid losing the data forever, flush here. | ||
| 286 | Flush(); | ||
| 287 | } | ||
| 197 | counter = std::move(counter_); | 288 | counter = std::move(counter_); |
| 289 | timestamp = timestamp_; | ||
| 290 | } | ||
| 291 | |||
| 292 | bool CachedQuery::WaitPending() const noexcept { | ||
| 293 | return counter && counter->WaitPending(); | ||
| 198 | } | 294 | } |
| 199 | 295 | ||
| 200 | QueryType CachedQuery::GetType() const { | 296 | QueryType CachedQuery::GetType() const noexcept { |
| 201 | return type; | 297 | return type; |
| 202 | } | 298 | } |
| 203 | 299 | ||
| 300 | VAddr CachedQuery::GetCpuAddr() const noexcept { | ||
| 301 | return cpu_addr; | ||
| 302 | } | ||
| 303 | |||
| 304 | CacheAddr CachedQuery::GetCacheAddr() const noexcept { | ||
| 305 | return ToCacheAddr(host_ptr); | ||
| 306 | } | ||
| 307 | |||
| 308 | u64 CachedQuery::GetSizeInBytes() const noexcept { | ||
| 309 | return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE; | ||
| 310 | } | ||
| 311 | |||
| 204 | } // namespace OpenGL | 312 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index 91594b120..d9f22b44d 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h | |||
| @@ -7,12 +7,12 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include <unordered_map> | ||
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | 12 | ||
| 12 | #include <glad/glad.h> | 13 | #include <glad/glad.h> |
| 13 | 14 | ||
| 14 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/rasterizer_interface.h" | 16 | #include "video_core/rasterizer_interface.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | 18 | ||
| @@ -43,6 +43,10 @@ public: | |||
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | private: | 45 | private: |
| 46 | void Enable(); | ||
| 47 | |||
| 48 | void Disable(bool any_command_queued); | ||
| 49 | |||
| 46 | void EndQuery(bool any_command_queued); | 50 | void EndQuery(bool any_command_queued); |
| 47 | 51 | ||
| 48 | QueryCache& cache; | 52 | QueryCache& cache; |
| @@ -53,12 +57,16 @@ private: | |||
| 53 | GLenum target; | 57 | GLenum target; |
| 54 | }; | 58 | }; |
| 55 | 59 | ||
| 56 | class QueryCache final : public RasterizerCache<std::shared_ptr<CachedQuery>> { | 60 | class QueryCache final { |
| 57 | public: | 61 | public: |
| 58 | explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); | 62 | explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); |
| 59 | ~QueryCache(); | 63 | ~QueryCache(); |
| 60 | 64 | ||
| 61 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type); | 65 | void InvalidateRegion(CacheAddr addr, std::size_t size); |
| 66 | |||
| 67 | void FlushRegion(CacheAddr addr, std::size_t size); | ||
| 68 | |||
| 69 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp); | ||
| 62 | 70 | ||
| 63 | void UpdateCounters(); | 71 | void UpdateCounters(); |
| 64 | 72 | ||
| @@ -69,15 +77,20 @@ public: | |||
| 69 | std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency, | 77 | std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency, |
| 70 | VideoCore::QueryType type); | 78 | VideoCore::QueryType type); |
| 71 | 79 | ||
| 72 | protected: | ||
| 73 | void FlushObjectInner(const std::shared_ptr<CachedQuery>& counter) override; | ||
| 74 | |||
| 75 | private: | 80 | private: |
| 81 | CachedQuery& Register(CachedQuery&& cached_query); | ||
| 82 | |||
| 83 | CachedQuery* TryGet(CacheAddr addr); | ||
| 84 | |||
| 85 | void Flush(CachedQuery& cached_query); | ||
| 86 | |||
| 76 | CounterStream& GetStream(VideoCore::QueryType type); | 87 | CounterStream& GetStream(VideoCore::QueryType type); |
| 77 | 88 | ||
| 78 | Core::System& system; | 89 | Core::System& system; |
| 79 | RasterizerOpenGL& rasterizer; | 90 | RasterizerOpenGL& rasterizer; |
| 80 | 91 | ||
| 92 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | ||
| 93 | |||
| 81 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 94 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 82 | std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries; | 95 | std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries; |
| 83 | }; | 96 | }; |
| @@ -85,50 +98,60 @@ private: | |||
| 85 | class HostCounter final { | 98 | class HostCounter final { |
| 86 | public: | 99 | public: |
| 87 | explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, | 100 | explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, |
| 88 | VideoCore::QueryType type); | ||
| 89 | explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||
| 90 | VideoCore::QueryType type, OGLQuery&& query); | 101 | VideoCore::QueryType type, OGLQuery&& query); |
| 91 | ~HostCounter(); | 102 | ~HostCounter(); |
| 92 | 103 | ||
| 93 | /// Returns the current value of the query. | 104 | /// Returns the current value of the query. |
| 94 | u64 Query(); | 105 | u64 Query(); |
| 95 | 106 | ||
| 107 | /// Returns true when querying this counter will potentially wait for OpenGL. | ||
| 108 | bool WaitPending() const noexcept; | ||
| 109 | |||
| 96 | private: | 110 | private: |
| 97 | QueryCache& cache; | 111 | QueryCache& cache; |
| 98 | VideoCore::QueryType type; | 112 | VideoCore::QueryType type; |
| 99 | 113 | ||
| 100 | std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one. | 114 | std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one. |
| 101 | OGLQuery query; ///< OpenGL query. | 115 | OGLQuery query; ///< OpenGL query. |
| 102 | u64 result; ///< Added values of the counter. | 116 | std::optional<u64> result; ///< Added values of the counter. |
| 103 | }; | 117 | }; |
| 104 | 118 | ||
| 105 | class CachedQuery final : public RasterizerCacheObject { | 119 | class CachedQuery final { |
| 106 | public: | 120 | public: |
| 107 | explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); | 121 | explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); |
| 122 | CachedQuery(CachedQuery&&) noexcept; | ||
| 123 | CachedQuery(const CachedQuery&) = delete; | ||
| 108 | ~CachedQuery(); | 124 | ~CachedQuery(); |
| 109 | 125 | ||
| 126 | CachedQuery& operator=(CachedQuery&&) noexcept; | ||
| 127 | |||
| 110 | /// Writes the counter value to host memory. | 128 | /// Writes the counter value to host memory. |
| 111 | void Flush(); | 129 | void Flush(); |
| 112 | 130 | ||
| 113 | /// Updates the counter this cached query registered in guest memory will write when requested. | 131 | /// Updates the counter this cached query registered in guest memory will write when requested. |
| 114 | void SetCounter(std::shared_ptr<HostCounter> counter); | 132 | void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp); |
| 133 | |||
| 134 | /// Returns true when a flushing this query will potentially wait for OpenGL. | ||
| 135 | bool WaitPending() const noexcept; | ||
| 115 | 136 | ||
| 116 | /// Returns the query type. | 137 | /// Returns the query type. |
| 117 | VideoCore::QueryType GetType() const; | 138 | VideoCore::QueryType GetType() const noexcept; |
| 118 | 139 | ||
| 119 | VAddr GetCpuAddr() const override { | 140 | /// Returns the guest CPU address for this query. |
| 120 | return cpu_addr; | 141 | VAddr GetCpuAddr() const noexcept; |
| 121 | } | ||
| 122 | 142 | ||
| 123 | std::size_t GetSizeInBytes() const override { | 143 | /// Returns the cache address for this query. |
| 124 | return sizeof(u64); | 144 | CacheAddr GetCacheAddr() const noexcept; |
| 125 | } | 145 | |
| 146 | /// Returns the number of cached bytes. | ||
| 147 | u64 GetSizeInBytes() const noexcept; | ||
| 126 | 148 | ||
| 127 | private: | 149 | private: |
| 128 | VideoCore::QueryType type; | 150 | VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed). |
| 129 | VAddr cpu_addr; ///< Guest CPU address. | 151 | VAddr cpu_addr; ///< Guest CPU address. |
| 130 | u8* host_ptr; ///< Writable host pointer. | 152 | u8* host_ptr; ///< Writable host pointer. |
| 131 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. | 153 | std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree. |
| 154 | std::optional<u64> timestamp; ///< Timestamp to flush to guest memory. | ||
| 132 | }; | 155 | }; |
| 133 | 156 | ||
| 134 | } // namespace OpenGL | 157 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 827f85884..4bdc8db85 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -722,8 +722,9 @@ void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | |||
| 722 | query_cache.ResetCounter(type); | 722 | query_cache.ResetCounter(type); |
| 723 | } | 723 | } |
| 724 | 724 | ||
| 725 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) { | 725 | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, |
| 726 | query_cache.Query(gpu_addr, type); | 726 | std::optional<u64> timestamp) { |
| 727 | query_cache.Query(gpu_addr, type, timestamp); | ||
| 727 | } | 728 | } |
| 728 | 729 | ||
| 729 | void RasterizerOpenGL::FlushAll() {} | 730 | void RasterizerOpenGL::FlushAll() {} |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4fb6811a7..c772fd4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -63,7 +63,7 @@ public: | |||
| 63 | void Clear() override; | 63 | void Clear() override; |
| 64 | void DispatchCompute(GPUVAddr code_addr) override; | 64 | void DispatchCompute(GPUVAddr code_addr) override; |
| 65 | void ResetCounter(VideoCore::QueryType type) override; | 65 | void ResetCounter(VideoCore::QueryType type) override; |
| 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override; | 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 67 | void FlushAll() override; | 67 | void FlushAll() override; |
| 68 | void FlushRegion(CacheAddr addr, u64 size) override; | 68 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 69 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 69 | void InvalidateRegion(CacheAddr addr, u64 size) override; |