summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-02-11 18:59:44 -0300
committerGravatar ReinUsesLisp2020-02-14 17:38:27 -0300
commitbcd348f2388cf944f2ac49364a8d13b47cc21456 (patch)
tree7aefb0077b4d8902bdab3f3026361173a71046e3 /src
parentquery_cache: Abstract OpenGL implementation (diff)
downloadyuzu-bcd348f2388cf944f2ac49364a8d13b47cc21456.tar.gz
yuzu-bcd348f2388cf944f2ac49364a8d13b47cc21456.tar.xz
yuzu-bcd348f2388cf944f2ac49364a8d13b47cc21456.zip
vk_query_cache: Implement generic query cache on Vulkan
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/query_cache.h37
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h11
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp122
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h104
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp21
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h15
11 files changed, 327 insertions, 20 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index bb5895e99..4b0c6346f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -180,6 +180,8 @@ if (ENABLE_VULKAN)
180 renderer_vulkan/vk_memory_manager.h 180 renderer_vulkan/vk_memory_manager.h
181 renderer_vulkan/vk_pipeline_cache.cpp 181 renderer_vulkan/vk_pipeline_cache.cpp
182 renderer_vulkan/vk_pipeline_cache.h 182 renderer_vulkan/vk_pipeline_cache.h
183 renderer_vulkan/vk_query_cache.cpp
184 renderer_vulkan/vk_query_cache.h
183 renderer_vulkan/vk_rasterizer.cpp 185 renderer_vulkan/vk_rasterizer.cpp
184 renderer_vulkan/vk_rasterizer.h 186 renderer_vulkan/vk_rasterizer.h
185 renderer_vulkan/vk_renderpass_cache.cpp 187 renderer_vulkan/vk_renderpass_cache.cpp
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 4c9151ce8..069032121 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -88,7 +88,8 @@ private:
88 std::shared_ptr<HostCounter> last; 88 std::shared_ptr<HostCounter> last;
89}; 89};
90 90
91template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> 91template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
92 class QueryPool>
92class QueryCacheBase { 93class QueryCacheBase {
93public: 94public:
94 explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) 95 explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
@@ -127,15 +128,25 @@ public:
127 128
128 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 129 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
129 void UpdateCounters() { 130 void UpdateCounters() {
131 std::unique_lock lock{mutex};
130 const auto& regs = system.GPU().Maxwell3D().regs; 132 const auto& regs = system.GPU().Maxwell3D().regs;
131 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); 133 Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
132 } 134 }
133 135
134 /// Resets a counter to zero. It doesn't disable the query after resetting. 136 /// Resets a counter to zero. It doesn't disable the query after resetting.
135 void ResetCounter(VideoCore::QueryType type) { 137 void ResetCounter(VideoCore::QueryType type) {
138 std::unique_lock lock{mutex};
136 Stream(type).Reset(); 139 Stream(type).Reset();
137 } 140 }
138 141
142 /// Disable all active streams. Expected to be called at the end of a command buffer.
143 void DisableStreams() {
144 std::unique_lock lock{mutex};
145 for (auto& stream : streams) {
146 stream.Update(false);
147 }
148 }
149
139 /// Returns a new host counter. 150 /// Returns a new host counter.
140 std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, 151 std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
141 VideoCore::QueryType type) { 152 VideoCore::QueryType type) {
@@ -148,6 +159,9 @@ public:
148 return streams[static_cast<std::size_t>(type)]; 159 return streams[static_cast<std::size_t>(type)];
149 } 160 }
150 161
162protected:
163 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
164
151private: 165private:
152 /// Flushes a memory range to guest memory and removes it from the cache. 166 /// Flushes a memory range to guest memory and removes it from the cache.
153 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { 167 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
@@ -213,8 +227,16 @@ private:
213template <class QueryCache, class HostCounter> 227template <class QueryCache, class HostCounter>
214class HostCounterBase { 228class HostCounterBase {
215public: 229public:
216 explicit HostCounterBase(std::shared_ptr<HostCounter> dependency) 230 explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
217 : dependency{std::move(dependency)} {} 231 : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
232 // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
233 static constexpr u64 depth_threshold = 96;
234 if (depth > depth_threshold) {
235 depth = 0;
236 base_result = dependency->Query();
237 dependency = nullptr;
238 }
239 }
218 240
219 /// Returns the current value of the query. 241 /// Returns the current value of the query.
220 u64 Query() { 242 u64 Query() {
@@ -222,9 +244,10 @@ public:
222 return *result; 244 return *result;
223 } 245 }
224 246
225 u64 value = BlockingQuery(); 247 u64 value = BlockingQuery() + base_result;
226 if (dependency) { 248 if (dependency) {
227 value += dependency->Query(); 249 value += dependency->Query();
250 dependency = nullptr;
228 } 251 }
229 252
230 return *(result = value); 253 return *(result = value);
@@ -235,6 +258,10 @@ public:
235 return result.has_value(); 258 return result.has_value();
236 } 259 }
237 260
261 u64 Depth() const noexcept {
262 return depth;
263 }
264
238protected: 265protected:
239 /// Returns the value of query from the backend API blocking as needed. 266 /// Returns the value of query from the backend API blocking as needed.
240 virtual u64 BlockingQuery() const = 0; 267 virtual u64 BlockingQuery() const = 0;
@@ -242,6 +269,8 @@ protected:
242private: 269private:
243 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. 270 std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
244 std::optional<u64> result; ///< Filled with the already returned value. 271 std::optional<u64> result; ///< Filled with the already returned value.
272 u64 depth; ///< Number of nested dependencies.
273 u64 base_result = 0; ///< Equivalent to nested dependencies value.
245}; 274};
246 275
247template <class HostCounter> 276template <class HostCounter>
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index 7d5a044c7..f12e9f55f 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -31,15 +31,16 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
31} // Anonymous namespace 31} // Anonymous namespace
32 32
33QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) 33QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
34 : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, 34 : VideoCommon::QueryCacheBase<
35 HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>( 35 QueryCache, CachedQuery, CounterStream, HostCounter,
36 gl_rasterizer)}, 36 std::vector<OGLQuery>>{system,
37 static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
37 gl_rasterizer{gl_rasterizer} {} 38 gl_rasterizer{gl_rasterizer} {}
38 39
39QueryCache::~QueryCache() = default; 40QueryCache::~QueryCache() = default;
40 41
41OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { 42OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
42 auto& reserve = queries_reserve[static_cast<std::size_t>(type)]; 43 auto& reserve = query_pools[static_cast<std::size_t>(type)];
43 OGLQuery query; 44 OGLQuery query;
44 if (reserve.empty()) { 45 if (reserve.empty()) {
45 query.Create(GetTarget(type)); 46 query.Create(GetTarget(type));
@@ -52,7 +53,7 @@ OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
52} 53}
53 54
54void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { 55void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
55 queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query)); 56 query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
56} 57}
57 58
58bool QueryCache::AnyCommandQueued() const noexcept { 59bool QueryCache::AnyCommandQueued() const noexcept {
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 20d337f15..99d187837 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -6,12 +6,8 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
10#include <unordered_map>
11#include <vector> 9#include <vector>
12 10
13#include <glad/glad.h>
14
15#include "common/common_types.h" 11#include "common/common_types.h"
16#include "video_core/query_cache.h" 12#include "video_core/query_cache.h"
17#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
@@ -30,8 +26,8 @@ class RasterizerOpenGL;
30 26
31using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; 27using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
32 28
33class QueryCache final 29class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
34 : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { 30 HostCounter, std::vector<OGLQuery>> {
35public: 31public:
36 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); 32 explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
37 ~QueryCache(); 33 ~QueryCache();
@@ -44,7 +40,6 @@ public:
44 40
45private: 41private:
46 RasterizerOpenGL& gl_rasterizer; 42 RasterizerOpenGL& gl_rasterizer;
47 std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
48}; 43};
49 44
50class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { 45class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
@@ -59,7 +54,7 @@ private:
59 u64 BlockingQuery() const override; 54 u64 BlockingQuery() const override;
60 55
61 QueryCache& cache; 56 QueryCache& cache;
62 VideoCore::QueryType type; 57 const VideoCore::QueryType type;
63 OGLQuery query; 58 OGLQuery query;
64}; 59};
65 60
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 9840f26e5..588a6835f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
104 features.depthBiasClamp = true; 104 features.depthBiasClamp = true;
105 features.geometryShader = true; 105 features.geometryShader = true;
106 features.tessellationShader = true; 106 features.tessellationShader = true;
107 features.occlusionQueryPrecise = true;
107 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
108 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
109 features.shaderStorageImageWriteWithoutFormat = true; 110 features.shaderStorageImageWriteWithoutFormat = true;
@@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
117 bit8_storage.uniformAndStorageBuffer8BitAccess = true; 118 bit8_storage.uniformAndStorageBuffer8BitAccess = true;
118 SetNext(next, bit8_storage); 119 SetNext(next, bit8_storage);
119 120
121 vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
122 host_query_reset.hostQueryReset = true;
123 SetNext(next, host_query_reset);
124
120 vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; 125 vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
121 if (is_float16_supported) { 126 if (is_float16_supported) {
122 float16_int8.shaderFloat16 = true; 127 float16_int8.shaderFloat16 = true;
@@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
273 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, 278 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
274 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, 279 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
275 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, 280 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
281 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
276 }; 282 };
277 std::bitset<required_extensions.size()> available_extensions{}; 283 std::bitset<required_extensions.size()> available_extensions{};
278 284
@@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
340 std::make_pair(features.depthBiasClamp, "depthBiasClamp"), 346 std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
341 std::make_pair(features.geometryShader, "geometryShader"), 347 std::make_pair(features.geometryShader, "geometryShader"),
342 std::make_pair(features.tessellationShader, "tessellationShader"), 348 std::make_pair(features.tessellationShader, "tessellationShader"),
349 std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
343 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), 350 std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
344 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), 351 std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
345 std::make_pair(features.shaderStorageImageWriteWithoutFormat, 352 std::make_pair(features.shaderStorageImageWriteWithoutFormat,
@@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
376 } 383 }
377 }; 384 };
378 385
379 extensions.reserve(13); 386 extensions.reserve(14);
380 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 387 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
381 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 388 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
382 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 389 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
384 extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); 391 extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
385 extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); 392 extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
386 extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); 393 extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
394 extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
387 395
388 [[maybe_unused]] const bool nsight = 396 [[maybe_unused]] const bool nsight =
389 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 397 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
new file mode 100644
index 000000000..ffbf60dda
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -0,0 +1,122 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <cstddef>
7#include <cstdint>
8#include <utility>
9#include <vector>
10
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_query_cache.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16
17namespace Vulkan {
18
19namespace {
20
21constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
22
23constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
24 return QUERY_TARGETS[static_cast<std::size_t>(type)];
25}
26
27} // Anonymous namespace
28
29QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
30
31QueryPool::~QueryPool() = default;
32
33void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
34 device = &device_;
35 type = type_;
36}
37
38std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
39 std::size_t index;
40 do {
41 index = CommitResource(fence);
42 } while (usage[index]);
43 usage[index] = true;
44
45 return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
46}
47
48void QueryPool::Allocate(std::size_t begin, std::size_t end) {
49 usage.resize(end);
50
51 const auto dev = device->GetLogical();
52 const u32 size = static_cast<u32>(end - begin);
53 const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
54 pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
55}
56
57void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
58 const auto it =
59 std::find_if(std::begin(pools), std::end(pools),
60 [query_pool = query.first](auto& pool) { return query_pool == *pool; });
61 ASSERT(it != std::end(pools));
62
63 const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
64 usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
65}
66
67VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
68 const VKDevice& device, VKScheduler& scheduler)
69 : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
70 QueryPool>{system, rasterizer},
71 device{device}, scheduler{scheduler} {
72 for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
73 query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
74 }
75}
76
77VKQueryCache::~VKQueryCache() = default;
78
79std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
80 return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
81}
82
83void VKQueryCache::Reserve(VideoCore::QueryType type,
84 std::pair<vk::QueryPool, std::uint32_t> query) {
85 query_pools[static_cast<std::size_t>(type)].Reserve(query);
86}
87
88HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
89 VideoCore::QueryType type)
90 : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
91 type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
92 const auto dev = cache.Device().GetLogical();
93 cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
94 dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
95 cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
96 });
97}
98
99HostCounter::~HostCounter() {
100 cache.Reserve(type, query);
101}
102
103void HostCounter::EndQuery() {
104 cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
105 cmdbuf.endQuery(query.first, query.second, dld);
106 });
107}
108
109u64 HostCounter::BlockingQuery() const {
110 if (ticks >= cache.Scheduler().Ticks()) {
111 cache.Scheduler().Flush();
112 }
113
114 const auto dev = cache.Device().GetLogical();
115 const auto& dld = cache.Device().GetDispatchLoader();
116 u64 value;
117 dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
118 vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
119 return value;
120}
121
122} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
new file mode 100644
index 000000000..c3092ee96
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -0,0 +1,104 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstddef>
8#include <cstdint>
9#include <memory>
10#include <utility>
11#include <vector>
12
13#include "common/common_types.h"
14#include "video_core/query_cache.h"
15#include "video_core/renderer_vulkan/declarations.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h"
17
18namespace VideoCore {
19class RasterizerInterface;
20}
21
22namespace Vulkan {
23
24class CachedQuery;
25class HostCounter;
26class VKDevice;
27class VKQueryCache;
28class VKScheduler;
29
30using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
31
32class QueryPool final : public VKFencedPool {
33public:
34 explicit QueryPool();
35 ~QueryPool() override;
36
37 void Initialize(const VKDevice& device, VideoCore::QueryType type);
38
39 std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
40
41 void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
42
43protected:
44 void Allocate(std::size_t begin, std::size_t end) override;
45
46private:
47 static constexpr std::size_t GROW_STEP = 512;
48
49 const VKDevice* device = nullptr;
50 VideoCore::QueryType type = {};
51
52 std::vector<UniqueQueryPool> pools;
53 std::vector<bool> usage;
54};
55
56class VKQueryCache final
57 : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
58 QueryPool> {
59public:
60 explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
61 const VKDevice& device, VKScheduler& scheduler);
62 ~VKQueryCache();
63
64 std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
65
66 void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
67
68 const VKDevice& Device() const noexcept {
69 return device;
70 }
71
72 VKScheduler& Scheduler() const noexcept {
73 return scheduler;
74 }
75
76private:
77 const VKDevice& device;
78 VKScheduler& scheduler;
79};
80
81class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
82public:
83 explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
84 VideoCore::QueryType type);
85 ~HostCounter();
86
87 void EndQuery();
88
89private:
90 u64 BlockingQuery() const override;
91
92 VKQueryCache& cache;
93 const VideoCore::QueryType type;
94 const std::pair<vk::QueryPool, std::uint32_t> query;
95 const u64 ticks;
96};
97
98class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
99public:
100 explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
101 : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
102};
103
104} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index aada38702..79aa121ed 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
289 staging_pool), 289 staging_pool),
290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), 290 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 291 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
292 sampler_cache(device) {} 292 sampler_cache(device), query_cache(system, *this, device, scheduler) {
293 scheduler.SetQueryCache(query_cache);
294}
293 295
294RasterizerVulkan::~RasterizerVulkan() = default; 296RasterizerVulkan::~RasterizerVulkan() = default;
295 297
@@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
308 310
309 FlushWork(); 311 FlushWork();
310 312
313 query_cache.UpdateCounters();
314
311 const auto& gpu = system.GPU().Maxwell3D(); 315 const auto& gpu = system.GPU().Maxwell3D();
312 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; 316 GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
313 317
@@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
362void RasterizerVulkan::Clear() { 366void RasterizerVulkan::Clear() {
363 MICROPROFILE_SCOPE(Vulkan_Clearing); 367 MICROPROFILE_SCOPE(Vulkan_Clearing);
364 368
369 query_cache.UpdateCounters();
370
365 const auto& gpu = system.GPU().Maxwell3D(); 371 const auto& gpu = system.GPU().Maxwell3D();
366 if (!system.GPU().Maxwell3D().ShouldExecute()) { 372 if (!system.GPU().Maxwell3D().ShouldExecute()) {
367 return; 373 return;
@@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
429 sampled_views.clear(); 435 sampled_views.clear();
430 image_views.clear(); 436 image_views.clear();
431 437
438 query_cache.UpdateCounters();
439
432 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 440 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
433 const ComputePipelineCacheKey key{ 441 const ComputePipelineCacheKey key{
434 code_addr, 442 code_addr,
@@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
471 }); 479 });
472} 480}
473 481
482void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
483 query_cache.ResetCounter(type);
484}
485
486void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
487 std::optional<u64> timestamp) {
488 query_cache.Query(gpu_addr, type, timestamp);
489}
490
474void RasterizerVulkan::FlushAll() {} 491void RasterizerVulkan::FlushAll() {}
475 492
476void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { 493void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
477 texture_cache.FlushRegion(addr, size); 494 texture_cache.FlushRegion(addr, size);
478 buffer_cache.FlushRegion(addr, size); 495 buffer_cache.FlushRegion(addr, size);
496 query_cache.FlushRegion(addr, size);
479} 497}
480 498
481void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { 499void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
482 texture_cache.InvalidateRegion(addr, size); 500 texture_cache.InvalidateRegion(addr, size);
483 pipeline_cache.InvalidateRegion(addr, size); 501 pipeline_cache.InvalidateRegion(addr, size);
484 buffer_cache.InvalidateRegion(addr, size); 502 buffer_cache.InvalidateRegion(addr, size);
503 query_cache.InvalidateRegion(addr, size);
485} 504}
486 505
487void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 506void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 7be71e734..add1ad88c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -24,6 +24,7 @@
24#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 24#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
25#include "video_core/renderer_vulkan/vk_memory_manager.h" 25#include "video_core/renderer_vulkan/vk_memory_manager.h"
26#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
27#include "video_core/renderer_vulkan/vk_query_cache.h"
27#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 28#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
28#include "video_core/renderer_vulkan/vk_resource_manager.h" 29#include "video_core/renderer_vulkan/vk_resource_manager.h"
29#include "video_core/renderer_vulkan/vk_sampler_cache.h" 30#include "video_core/renderer_vulkan/vk_sampler_cache.h"
@@ -96,7 +97,7 @@ struct ImageView {
96 vk::ImageLayout* layout = nullptr; 97 vk::ImageLayout* layout = nullptr;
97}; 98};
98 99
99class RasterizerVulkan : public VideoCore::RasterizerAccelerated { 100class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
100public: 101public:
101 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, 102 explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
102 VKScreenInfo& screen_info, const VKDevice& device, 103 VKScreenInfo& screen_info, const VKDevice& device,
@@ -108,6 +109,8 @@ public:
108 bool DrawMultiBatch(bool is_indexed) override; 109 bool DrawMultiBatch(bool is_indexed) override;
109 void Clear() override; 110 void Clear() override;
110 void DispatchCompute(GPUVAddr code_addr) override; 111 void DispatchCompute(GPUVAddr code_addr) override;
112 void ResetCounter(VideoCore::QueryType type) override;
113 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
111 void FlushAll() override; 114 void FlushAll() override;
112 void FlushRegion(CacheAddr addr, u64 size) override; 115 void FlushRegion(CacheAddr addr, u64 size) override;
113 void InvalidateRegion(CacheAddr addr, u64 size) override; 116 void InvalidateRegion(CacheAddr addr, u64 size) override;
@@ -247,6 +250,7 @@ private:
247 VKPipelineCache pipeline_cache; 250 VKPipelineCache pipeline_cache;
248 VKBufferCache buffer_cache; 251 VKBufferCache buffer_cache;
249 VKSamplerCache sampler_cache; 252 VKSamplerCache sampler_cache;
253 VKQueryCache query_cache;
250 254
251 std::array<View, Maxwell::NumRenderTargets> color_attachments; 255 std::array<View, Maxwell::NumRenderTargets> color_attachments;
252 View zeta_attachment; 256 View zeta_attachment;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index d66133ad1..92bd6c344 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "video_core/renderer_vulkan/declarations.h" 7#include "video_core/renderer_vulkan/declarations.h"
8#include "video_core/renderer_vulkan/vk_device.h" 8#include "video_core/renderer_vulkan/vk_device.h"
9#include "video_core/renderer_vulkan/vk_query_cache.h"
9#include "video_core/renderer_vulkan/vk_resource_manager.h" 10#include "video_core/renderer_vulkan/vk_resource_manager.h"
10#include "video_core/renderer_vulkan/vk_scheduler.h" 11#include "video_core/renderer_vulkan/vk_scheduler.h"
11 12
@@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
139} 140}
140 141
141void VKScheduler::AllocateNewContext() { 142void VKScheduler::AllocateNewContext() {
143 ++ticks;
144
142 std::unique_lock lock{mutex}; 145 std::unique_lock lock{mutex};
143 current_fence = next_fence; 146 current_fence = next_fence;
144 next_fence = &resource_manager.CommitFence(); 147 next_fence = &resource_manager.CommitFence();
@@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
146 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); 149 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
147 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, 150 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
148 device.GetDispatchLoader()); 151 device.GetDispatchLoader());
152 // Enable counters once again. These are disabled when a command buffer is finished.
153 if (query_cache) {
154 query_cache->UpdateCounters();
155 }
149} 156}
150 157
151void VKScheduler::InvalidateState() { 158void VKScheduler::InvalidateState() {
@@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
159} 166}
160 167
161void VKScheduler::EndPendingOperations() { 168void VKScheduler::EndPendingOperations() {
169 query_cache->DisableStreams();
162 EndRenderPass(); 170 EndRenderPass();
163} 171}
164 172
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index bcdffbba0..62fd7858b 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <condition_variable> 8#include <condition_variable>
8#include <memory> 9#include <memory>
9#include <optional> 10#include <optional>
@@ -18,6 +19,7 @@ namespace Vulkan {
18 19
19class VKDevice; 20class VKDevice;
20class VKFence; 21class VKFence;
22class VKQueryCache;
21class VKResourceManager; 23class VKResourceManager;
22 24
23class VKFenceView { 25class VKFenceView {
@@ -67,6 +69,11 @@ public:
67 /// Binds a pipeline to the current execution context. 69 /// Binds a pipeline to the current execution context.
68 void BindGraphicsPipeline(vk::Pipeline pipeline); 70 void BindGraphicsPipeline(vk::Pipeline pipeline);
69 71
72 /// Assigns the query cache.
73 void SetQueryCache(VKQueryCache& query_cache_) {
74 query_cache = &query_cache_;
75 }
76
70 /// Returns true when viewports have been set in the current command buffer. 77 /// Returns true when viewports have been set in the current command buffer.
71 bool TouchViewports() { 78 bool TouchViewports() {
72 return std::exchange(state.viewports, true); 79 return std::exchange(state.viewports, true);
@@ -112,6 +119,11 @@ public:
112 return current_fence; 119 return current_fence;
113 } 120 }
114 121
122 /// Returns the current command buffer tick.
123 u64 Ticks() const {
124 return ticks;
125 }
126
115private: 127private:
116 class Command { 128 class Command {
117 public: 129 public:
@@ -205,6 +217,8 @@ private:
205 217
206 const VKDevice& device; 218 const VKDevice& device;
207 VKResourceManager& resource_manager; 219 VKResourceManager& resource_manager;
220 VKQueryCache* query_cache = nullptr;
221
208 vk::CommandBuffer current_cmdbuf; 222 vk::CommandBuffer current_cmdbuf;
209 VKFence* current_fence = nullptr; 223 VKFence* current_fence = nullptr;
210 VKFence* next_fence = nullptr; 224 VKFence* next_fence = nullptr;
@@ -227,6 +241,7 @@ private:
227 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; 241 Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
228 std::mutex mutex; 242 std::mutex mutex;
229 std::condition_variable cv; 243 std::condition_variable cv;
244 std::atomic<u64> ticks = 0;
230 bool quit = false; 245 bool quit = false;
231}; 246};
232 247