diff options
| author | 2023-08-04 03:31:52 +0200 | |
|---|---|---|
| committer | 2023-09-23 23:05:29 +0200 | |
| commit | bdc01254a9b3ce8359f8f007c2102cb2d112418e (patch) | |
| tree | b75b974c0751f83089d64957df567e0d138981b1 /src/video_core/query_cache | |
| parent | Merge pull request #11567 from liamwhite/fixing-my-error (diff) | |
| download | yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.gz yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.tar.xz yuzu-bdc01254a9b3ce8359f8f007c2102cb2d112418e.zip | |
Query Cache: Setup Base rework
Diffstat (limited to 'src/video_core/query_cache')
| -rw-r--r-- | src/video_core/query_cache/bank_base.h | 106 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_base.h | 72 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache.h | 543 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache_base.h | 181 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_stream.h | 125 | ||||
| -rw-r--r-- | src/video_core/query_cache/types.h | 74 |
6 files changed, 1101 insertions, 0 deletions
diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h new file mode 100644 index 000000000..4246a609d --- /dev/null +++ b/src/video_core/query_cache/bank_base.h | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <atomic> | ||
| 7 | #include <deque> | ||
| 8 | #include <utility> | ||
| 9 | |||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | class BankBase { | ||
| 16 | protected: | ||
| 17 | const size_t base_bank_size; | ||
| 18 | size_t bank_size; | ||
| 19 | std::atomic<size_t> references; | ||
| 20 | size_t current_slot; | ||
| 21 | |||
| 22 | public: | ||
| 23 | BankBase(size_t bank_size_) | ||
| 24 | : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} | ||
| 25 | |||
| 26 | virtual ~BankBase() = default; | ||
| 27 | |||
| 28 | virtual std::pair<bool, size_t> Reserve() { | ||
| 29 | if (IsClosed()) { | ||
| 30 | return {false, bank_size}; | ||
| 31 | } | ||
| 32 | const size_t result = current_slot++; | ||
| 33 | return {true, result}; | ||
| 34 | } | ||
| 35 | |||
| 36 | virtual void Reset() { | ||
| 37 | current_slot = 0; | ||
| 38 | references = 0; | ||
| 39 | bank_size = base_bank_size; | ||
| 40 | } | ||
| 41 | |||
| 42 | size_t Size() const { | ||
| 43 | return bank_size; | ||
| 44 | } | ||
| 45 | |||
| 46 | void AddReference(size_t how_many = 1) { | ||
| 47 | references.fetch_add(how_many, std::memory_order_relaxed); | ||
| 48 | } | ||
| 49 | |||
| 50 | void CloseReference(size_t how_many = 1) { | ||
| 51 | if (how_many > references.load(std::memory_order_relaxed)) { | ||
| 52 | UNREACHABLE(); | ||
| 53 | } | ||
| 54 | references.fetch_sub(how_many, std::memory_order_relaxed); | ||
| 55 | } | ||
| 56 | |||
| 57 | void Close() { | ||
| 58 | bank_size = current_slot; | ||
| 59 | } | ||
| 60 | |||
| 61 | constexpr bool IsClosed() { | ||
| 62 | return current_slot >= bank_size; | ||
| 63 | } | ||
| 64 | |||
| 65 | bool IsDead() { | ||
| 66 | return IsClosed() && references == 0; | ||
| 67 | } | ||
| 68 | }; | ||
| 69 | |||
| 70 | template <typename BankType> | ||
| 71 | class BankPool { | ||
| 72 | private: | ||
| 73 | std::deque<BankType> bank_pool; | ||
| 74 | std::deque<size_t> bank_indices; | ||
| 75 | |||
| 76 | public: | ||
| 77 | BankPool() = default; | ||
| 78 | ~BankPool() = default; | ||
| 79 | |||
| 80 | // Reserve a bank from the pool and return its index | ||
| 81 | template <typename Func> | ||
| 82 | size_t ReserveBank(Func&& builder) { | ||
| 83 | if (!bank_indices.empty() && bank_pool[bank_indices.front()].IsDead()) { | ||
| 84 | size_t new_index = bank_indices.front(); | ||
| 85 | bank_indices.pop_front(); | ||
| 86 | bank_pool[new_index].Reset(); | ||
| 87 | return new_index; | ||
| 88 | } | ||
| 89 | size_t new_index = bank_pool.size(); | ||
| 90 | builder(bank_pool, new_index); | ||
| 91 | bank_indices.push_back(new_index); | ||
| 92 | return new_index; | ||
| 93 | } | ||
| 94 | |||
| 95 | // Get a reference to a bank using its index | ||
| 96 | BankType& GetBank(size_t index) { | ||
| 97 | return bank_pool[index]; | ||
| 98 | } | ||
| 99 | |||
| 100 | // Get the total number of banks in the pool | ||
| 101 | size_t BankCount() const { | ||
| 102 | return bank_pool.size(); | ||
| 103 | } | ||
| 104 | }; | ||
| 105 | |||
| 106 | } // namespace VideoCommon | ||
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h new file mode 100644 index 000000000..485ed669c --- /dev/null +++ b/src/video_core/query_cache/query_base.h | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/common_funcs.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | enum class QueryFlagBits : u32 { | ||
| 12 | HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. | ||
| 13 | IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host | ||
| 14 | IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host | ||
| 15 | IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. | ||
| 16 | IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query | ||
| 17 | IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query | ||
| 18 | IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. | ||
| 19 | IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. | ||
| 20 | IsFence = 1 << 8, ///< Indicates the query is a fence. | ||
| 21 | }; | ||
| 22 | DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) | ||
| 23 | |||
| 24 | class QueryBase { | ||
| 25 | public: | ||
| 26 | VAddr guest_address; | ||
| 27 | QueryFlagBits flags; | ||
| 28 | u64 value; | ||
| 29 | |||
| 30 | protected: | ||
| 31 | // Default constructor | ||
| 32 | QueryBase() : guest_address(0), flags{}, value{} {} | ||
| 33 | |||
| 34 | // Parameterized constructor | ||
| 35 | QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) | ||
| 36 | : guest_address(address), flags(flags_), value{value_} {} | ||
| 37 | }; | ||
| 38 | |||
| 39 | class GuestQuery : public QueryBase { | ||
| 40 | public: | ||
| 41 | // Parameterized constructor | ||
| 42 | GuestQuery(bool isLong, VAddr address, u64 queryValue) | ||
| 43 | : QueryBase(address, QueryFlagBits::IsFinalValueSynced, queryValue) { | ||
| 44 | if (isLong) { | ||
| 45 | flags |= QueryFlagBits::HasTimestamp; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | }; | ||
| 49 | |||
| 50 | class HostQueryBase : public QueryBase { | ||
| 51 | public: | ||
| 52 | // Default constructor | ||
| 53 | HostQueryBase() | ||
| 54 | : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, | ||
| 55 | size_banks{}, start_slot{}, size_slots{} {} | ||
| 56 | |||
| 57 | // Parameterized constructor | ||
| 58 | HostQueryBase(bool isLong, VAddr address) | ||
| 59 | : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, | ||
| 60 | start_slot{}, size_slots{} { | ||
| 61 | if (isLong) { | ||
| 62 | flags |= QueryFlagBits::HasTimestamp; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | u32 start_bank_id; | ||
| 67 | u32 size_banks; | ||
| 68 | size_t start_slot; | ||
| 69 | size_t size_slots; | ||
| 70 | }; | ||
| 71 | |||
| 72 | } // namespace VideoCommon \ No newline at end of file | ||
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h new file mode 100644 index 000000000..f6af48d14 --- /dev/null +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -0,0 +1,543 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <deque> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/assert.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/logging/log.h" | ||
| 16 | #include "common/scope_exit.h" | ||
| 17 | #include "common/settings.h" | ||
| 18 | #include "core/memory.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/gpu.h" | ||
| 21 | #include "video_core/memory_manager.h" | ||
| 22 | #include "video_core/query_cache/bank_base.h" | ||
| 23 | #include "video_core/query_cache/query_base.h" | ||
| 24 | #include "video_core/query_cache/query_cache_base.h" | ||
| 25 | #include "video_core/query_cache/query_stream.h" | ||
| 26 | #include "video_core/query_cache/types.h" | ||
| 27 | |||
| 28 | namespace VideoCommon { | ||
| 29 | |||
| 30 | using Maxwell = Tegra::Engines::Maxwell3D; | ||
| 31 | |||
| 32 | struct SyncValuesStruct { | ||
| 33 | VAddr address; | ||
| 34 | u64 value; | ||
| 35 | u64 size; | ||
| 36 | |||
| 37 | static constexpr bool GeneratesBaseBuffer = true; | ||
| 38 | }; | ||
| 39 | |||
| 40 | template <typename Traits> | ||
| 41 | class GuestStreamer : public SimpleStreamer<GuestQuery> { | ||
| 42 | public: | ||
| 43 | using RuntimeType = typename Traits::RuntimeType; | ||
| 44 | |||
| 45 | GuestStreamer(size_t id_, RuntimeType& runtime_) | ||
| 46 | : SimpleStreamer<GuestQuery>(id_), runtime{runtime_} {} | ||
| 47 | |||
| 48 | virtual ~GuestStreamer() = default; | ||
| 49 | |||
| 50 | size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 51 | std::optional<u32> subreport = std::nullopt) override { | ||
| 52 | auto new_id = BuildQuery(has_timestamp, address, static_cast<u64>(value)); | ||
| 53 | pending_sync.push_back(new_id); | ||
| 54 | return new_id; | ||
| 55 | } | ||
| 56 | |||
| 57 | bool HasPendingSync() override { | ||
| 58 | return !pending_sync.empty(); | ||
| 59 | } | ||
| 60 | |||
| 61 | void SyncWrites() override { | ||
| 62 | if (pending_sync.empty()) { | ||
| 63 | return; | ||
| 64 | } | ||
| 65 | std::vector<SyncValuesStruct> sync_values; | ||
| 66 | sync_values.reserve(pending_sync.size()); | ||
| 67 | for (size_t pending_id : pending_sync) { | ||
| 68 | auto& query = slot_queries[pending_id]; | ||
| 69 | if (True(query.flags & QueryFlagBits::IsRewritten) || | ||
| 70 | True(query.flags & QueryFlagBits::IsInvalidated)) { | ||
| 71 | continue; | ||
| 72 | } | ||
| 73 | query.flags |= QueryFlagBits::IsHostSynced; | ||
| 74 | sync_values.emplace_back(query.guest_address, query.value, | ||
| 75 | True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); | ||
| 76 | } | ||
| 77 | pending_sync.clear(); | ||
| 78 | if (sync_values.size() > 0) { | ||
| 79 | runtime.template SyncValues<SyncValuesStruct>(sync_values); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | private: | ||
| 84 | RuntimeType& runtime; | ||
| 85 | std::deque<size_t> pending_sync; | ||
| 86 | }; | ||
| 87 | |||
| 88 | template <typename Traits> | ||
| 89 | class StubStreamer : public GuestStreamer<Traits> { | ||
| 90 | public: | ||
| 91 | using RuntimeType = typename Traits::RuntimeType; | ||
| 92 | |||
| 93 | StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} | ||
| 94 | |||
| 95 | ~StubStreamer() override = default; | ||
| 96 | |||
| 97 | size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, | ||
| 98 | std::optional<u32> subreport = std::nullopt) override { | ||
| 99 | size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); | ||
| 100 | return new_id; | ||
| 101 | } | ||
| 102 | }; | ||
| 103 | |||
| 104 | template <typename Traits> | ||
| 105 | struct QueryCacheBase<Traits>::QueryCacheBaseImpl { | ||
| 106 | using RuntimeType = typename Traits::RuntimeType; | ||
| 107 | |||
| 108 | QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 109 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) | ||
| 110 | : owner{owner_}, rasterizer{rasterizer_}, | ||
| 111 | cpu_memory{cpu_memory_}, runtime{runtime_}, gpu{gpu_} { | ||
| 112 | streamer_mask = 0; | ||
| 113 | for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { | ||
| 114 | streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); | ||
| 115 | if (streamers[i]) { | ||
| 116 | streamer_mask |= 1ULL << i; | ||
| 117 | } | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 | template <typename Func> | ||
| 122 | void ForEachStreamerIn(u64 mask, Func&& func) { | ||
| 123 | static constexpr bool RETURNS_BOOL = | ||
| 124 | std::is_same_v<std::invoke_result<Func, StreamerInterface*>, bool>; | ||
| 125 | while (mask != 0) { | ||
| 126 | size_t position = std::countr_zero(mask); | ||
| 127 | mask &= ~(1ULL << position); | ||
| 128 | if constexpr (RETURNS_BOOL) { | ||
| 129 | if (func(streamers[position])) { | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | } else { | ||
| 133 | func(streamers[position]); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | template <typename Func> | ||
| 139 | void ForEachStreamer(Func&& func) { | ||
| 140 | ForEachStreamerIn(streamer_mask, func); | ||
| 141 | } | ||
| 142 | |||
| 143 | QueryBase* ObtainQuery(QueryCacheBase<Traits>::QueryLocation location) { | ||
| 144 | size_t which_stream = location.stream_id.Value(); | ||
| 145 | auto* streamer = streamers[which_stream]; | ||
| 146 | if (!streamer) { | ||
| 147 | return nullptr; | ||
| 148 | } | ||
| 149 | return streamer->GetQuery(location.query_id.Value()); | ||
| 150 | } | ||
| 151 | |||
| 152 | QueryCacheBase<Traits>* owner; | ||
| 153 | VideoCore::RasterizerInterface& rasterizer; | ||
| 154 | Core::Memory::Memory& cpu_memory; | ||
| 155 | Traits::RuntimeType& runtime; | ||
| 156 | Tegra::GPU& gpu; | ||
| 157 | std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; | ||
| 158 | u64 streamer_mask; | ||
| 159 | std::mutex flush_guard; | ||
| 160 | std::deque<u64> flushes_pending; | ||
| 161 | std::vector<QueryCacheBase<Traits>::QueryLocation> pending_unregister; | ||
| 162 | }; | ||
| 163 | |||
| 164 | template <typename Traits> | ||
| 165 | QueryCacheBase<Traits>::QueryCacheBase(Tegra::GPU& gpu_, | ||
| 166 | VideoCore::RasterizerInterface& rasterizer_, | ||
| 167 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_) | ||
| 168 | : cached_queries{} { | ||
| 169 | impl = std::make_unique<QueryCacheBase<Traits>::QueryCacheBaseImpl>( | ||
| 170 | this, rasterizer_, cpu_memory_, runtime_, gpu_); | ||
| 171 | } | ||
| 172 | |||
| 173 | template <typename Traits> | ||
| 174 | QueryCacheBase<Traits>::~QueryCacheBase() = default; | ||
| 175 | |||
| 176 | template <typename Traits> | ||
| 177 | void QueryCacheBase<Traits>::CounterEnable(QueryType counter_type, bool is_enabled) { | ||
| 178 | size_t index = static_cast<size_t>(counter_type); | ||
| 179 | StreamerInterface* streamer = impl->streamers[index]; | ||
| 180 | if (!streamer) [[unlikely]] { | ||
| 181 | UNREACHABLE(); | ||
| 182 | return; | ||
| 183 | } | ||
| 184 | if (is_enabled) { | ||
| 185 | streamer->StartCounter(); | ||
| 186 | } else { | ||
| 187 | streamer->PauseCounter(); | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | template <typename Traits> | ||
| 192 | void QueryCacheBase<Traits>::CounterClose(QueryType counter_type) { | ||
| 193 | size_t index = static_cast<size_t>(counter_type); | ||
| 194 | StreamerInterface* streamer = impl->streamers[index]; | ||
| 195 | if (!streamer) [[unlikely]] { | ||
| 196 | UNREACHABLE(); | ||
| 197 | return; | ||
| 198 | } | ||
| 199 | streamer->CloseCounter(); | ||
| 200 | } | ||
| 201 | |||
| 202 | template <typename Traits> | ||
| 203 | void QueryCacheBase<Traits>::CounterReset(QueryType counter_type) { | ||
| 204 | size_t index = static_cast<size_t>(counter_type); | ||
| 205 | StreamerInterface* streamer = impl->streamers[index]; | ||
| 206 | if (!streamer) [[unlikely]] { | ||
| 207 | UNIMPLEMENTED(); | ||
| 208 | return; | ||
| 209 | } | ||
| 210 | streamer->ResetCounter(); | ||
| 211 | } | ||
| 212 | |||
| 213 | template <typename Traits> | ||
| 214 | void QueryCacheBase<Traits>::BindToChannel(s32 id) { | ||
| 215 | VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>::BindToChannel(id); | ||
| 216 | impl->runtime.Bind3DEngine(maxwell3d); | ||
| 217 | } | ||
| 218 | |||
| 219 | template <typename Traits> | ||
| 220 | void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type, | ||
| 221 | QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||
| 222 | const bool has_timestamp = True(flags & QueryPropertiesFlags::HasTimeout); | ||
| 223 | const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); | ||
| 224 | size_t streamer_id = static_cast<size_t>(counter_type); | ||
| 225 | auto* streamer = impl->streamers[streamer_id]; | ||
| 226 | if (!streamer) [[unlikely]] { | ||
| 227 | if (has_timestamp) { | ||
| 228 | u64 timestamp = impl->gpu.GetTicks(); | ||
| 229 | gpu_memory->Write<u64>(addr + 8, timestamp); | ||
| 230 | gpu_memory->Write<u64>(addr, 1ULL); | ||
| 231 | } else { | ||
| 232 | gpu_memory->Write<u32>(addr, 1U); | ||
| 233 | } | ||
| 234 | return; | ||
| 235 | } | ||
| 236 | auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); | ||
| 237 | if (!cpu_addr_opt) [[unlikely]] { | ||
| 238 | return; | ||
| 239 | } | ||
| 240 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 241 | const size_t new_query_id = streamer->WriteCounter(cpu_addr, has_timestamp, payload, subreport); | ||
| 242 | auto* query = streamer->GetQuery(new_query_id); | ||
| 243 | if (is_fence) { | ||
| 244 | query->flags |= QueryFlagBits::IsFence; | ||
| 245 | } | ||
| 246 | QueryLocation query_location{}; | ||
| 247 | query_location.stream_id.Assign(static_cast<u32>(streamer_id)); | ||
| 248 | query_location.query_id.Assign(static_cast<u32>(new_query_id)); | ||
| 249 | const auto gen_caching_indexing = [](VAddr cur_addr) { | ||
| 250 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | ||
| 251 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | ||
| 252 | }; | ||
| 253 | u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); | ||
| 254 | u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); | ||
| 255 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | ||
| 256 | std::function<void()> operation( | ||
| 257 | [this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { | ||
| 258 | if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { | ||
| 259 | if (!is_synced) [[likely]] { | ||
| 260 | impl->pending_unregister.push_back(query_location); | ||
| 261 | } | ||
| 262 | return; | ||
| 263 | } | ||
| 264 | if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { | ||
| 265 | UNREACHABLE(); | ||
| 266 | return; | ||
| 267 | } | ||
| 268 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||
| 269 | u64 timestamp = impl->gpu.GetTicks(); | ||
| 270 | std::memcpy(pointer_timestamp, ×tamp, sizeof(timestamp)); | ||
| 271 | std::memcpy(pointer, &query_base->value, sizeof(query_base->value)); | ||
| 272 | } else { | ||
| 273 | u32 value = static_cast<u32>(query_base->value); | ||
| 274 | std::memcpy(pointer, &value, sizeof(value)); | ||
| 275 | } | ||
| 276 | if (!is_synced) [[likely]] { | ||
| 277 | impl->pending_unregister.push_back(query_location); | ||
| 278 | } | ||
| 279 | }); | ||
| 280 | if (is_fence) { | ||
| 281 | impl->rasterizer.SignalFence(std::move(operation)); | ||
| 282 | } else { | ||
| 283 | impl->rasterizer.SyncOperation(std::move(operation)); | ||
| 284 | } | ||
| 285 | if (is_synced) { | ||
| 286 | streamer->Free(new_query_id); | ||
| 287 | return; | ||
| 288 | } | ||
| 289 | auto [cont_addr, base] = gen_caching_indexing(cpu_addr); | ||
| 290 | { | ||
| 291 | std::scoped_lock lock(cache_mutex); | ||
| 292 | auto it1 = cached_queries.try_emplace(cont_addr); | ||
| 293 | auto& sub_container = it1.first->second; | ||
| 294 | auto it_current = sub_container.find(base); | ||
| 295 | if (it_current == sub_container.end()) { | ||
| 296 | sub_container.insert_or_assign(base, query_location); | ||
| 297 | return; | ||
| 298 | } | ||
| 299 | auto* old_query = impl->ObtainQuery(it_current->second); | ||
| 300 | old_query->flags |= QueryFlagBits::IsRewritten; | ||
| 301 | sub_container.insert_or_assign(base, query_location); | ||
| 302 | } | ||
| 303 | } | ||
| 304 | |||
| 305 | template <typename Traits> | ||
| 306 | void QueryCacheBase<Traits>::UnregisterPending() { | ||
| 307 | const auto gen_caching_indexing = [](VAddr cur_addr) { | ||
| 308 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | ||
| 309 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | ||
| 310 | }; | ||
| 311 | std::scoped_lock lock(cache_mutex); | ||
| 312 | for (QueryLocation loc : impl->pending_unregister) { | ||
| 313 | const auto [streamer_id, query_id] = loc.unpack(); | ||
| 314 | auto* streamer = impl->streamers[streamer_id]; | ||
| 315 | if (!streamer) [[unlikely]] { | ||
| 316 | continue; | ||
| 317 | } | ||
| 318 | auto* query = streamer->GetQuery(query_id); | ||
| 319 | auto [cont_addr, base] = gen_caching_indexing(query->guest_address); | ||
| 320 | auto it1 = cached_queries.find(cont_addr); | ||
| 321 | if (it1 != cached_queries.end()) { | ||
| 322 | auto it2 = it1->second.find(base); | ||
| 323 | if (it2 != it1->second.end()) { | ||
| 324 | if (it2->second.raw == loc.raw) { | ||
| 325 | it1->second.erase(it2); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | } | ||
| 329 | streamer->Free(query_id); | ||
| 330 | } | ||
| 331 | impl->pending_unregister.clear(); | ||
| 332 | } | ||
| 333 | |||
| 334 | template <typename Traits> | ||
| 335 | void QueryCacheBase<Traits>::NotifyWFI() { | ||
| 336 | bool should_sync = false; | ||
| 337 | impl->ForEachStreamer( | ||
| 338 | [&should_sync](StreamerInterface* streamer) { should_sync |= streamer->HasPendingSync(); }); | ||
| 339 | if (!should_sync) { | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | |||
| 343 | impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->PresyncWrites(); }); | ||
| 344 | impl->runtime.Barriers(true); | ||
| 345 | impl->ForEachStreamer([](StreamerInterface* streamer) { streamer->SyncWrites(); }); | ||
| 346 | impl->runtime.Barriers(false); | ||
| 347 | } | ||
| 348 | |||
| 349 | template <typename Traits> | ||
| 350 | void QueryCacheBase<Traits>::NotifySegment(bool resume) { | ||
| 351 | if (resume) { | ||
| 352 | impl->runtime.ResumeHostConditionalRendering(); | ||
| 353 | } else { | ||
| 354 | impl->runtime.PauseHostConditionalRendering(); | ||
| 355 | CounterClose(VideoCommon::QueryType::ZPassPixelCount64); | ||
| 356 | CounterClose(VideoCommon::QueryType::StreamingByteCount); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | template <typename Traits> | ||
| 361 | bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { | ||
| 362 | bool qc_dirty = false; | ||
| 363 | const auto gen_lookup = [this, &qc_dirty](GPUVAddr address) -> VideoCommon::LookupData { | ||
| 364 | auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(address); | ||
| 365 | if (!cpu_addr_opt) [[unlikely]] { | ||
| 366 | return VideoCommon::LookupData{ | ||
| 367 | .address = 0, | ||
| 368 | .found_query = nullptr, | ||
| 369 | }; | ||
| 370 | } | ||
| 371 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 372 | std::scoped_lock lock(cache_mutex); | ||
| 373 | auto it1 = cached_queries.find(cpu_addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 374 | if (it1 == cached_queries.end()) { | ||
| 375 | return VideoCommon::LookupData{ | ||
| 376 | .address = cpu_addr, | ||
| 377 | .found_query = nullptr, | ||
| 378 | }; | ||
| 379 | } | ||
| 380 | auto& sub_container = it1->second; | ||
| 381 | auto it_current = sub_container.find(cpu_addr & Core::Memory::YUZU_PAGEMASK); | ||
| 382 | |||
| 383 | if (it_current == sub_container.end()) { | ||
| 384 | auto it_current_2 = sub_container.find((cpu_addr & Core::Memory::YUZU_PAGEMASK) + 4); | ||
| 385 | if (it_current_2 == sub_container.end()) { | ||
| 386 | return VideoCommon::LookupData{ | ||
| 387 | .address = cpu_addr, | ||
| 388 | .found_query = nullptr, | ||
| 389 | }; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | auto* query = impl->ObtainQuery(it_current->second); | ||
| 393 | qc_dirty |= True(query->flags & QueryFlagBits::IsHostManaged) && | ||
| 394 | False(query->flags & QueryFlagBits::IsGuestSynced); | ||
| 395 | return VideoCommon::LookupData{ | ||
| 396 | .address = cpu_addr, | ||
| 397 | .found_query = query, | ||
| 398 | }; | ||
| 399 | }; | ||
| 400 | |||
| 401 | auto& regs = maxwell3d->regs; | ||
| 402 | if (regs.render_enable_override != Maxwell::Regs::RenderEnable::Override::UseRenderEnable) { | ||
| 403 | impl->runtime.EndHostConditionalRendering(); | ||
| 404 | return false; | ||
| 405 | } | ||
| 406 | /*if (!Settings::IsGPULevelHigh()) { | ||
| 407 | impl->runtime.EndHostConditionalRendering(); | ||
| 408 | return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, | ||
| 409 | VideoCommon::CacheType::BufferCache | | ||
| 410 | VideoCommon::CacheType::QueryCache); | ||
| 411 | }*/ | ||
| 412 | const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); | ||
| 413 | const GPUVAddr address = regs.render_enable.Address(); | ||
| 414 | switch (mode) { | ||
| 415 | case ComparisonMode::True: | ||
| 416 | impl->runtime.EndHostConditionalRendering(); | ||
| 417 | return false; | ||
| 418 | case ComparisonMode::False: | ||
| 419 | impl->runtime.EndHostConditionalRendering(); | ||
| 420 | return false; | ||
| 421 | case ComparisonMode::Conditional: { | ||
| 422 | VideoCommon::LookupData object_1{gen_lookup(address)}; | ||
| 423 | return impl->runtime.HostConditionalRenderingCompareValue(object_1, qc_dirty); | ||
| 424 | } | ||
| 425 | case ComparisonMode::IfEqual: { | ||
| 426 | VideoCommon::LookupData object_1{gen_lookup(address)}; | ||
| 427 | VideoCommon::LookupData object_2{gen_lookup(address + 16)}; | ||
| 428 | return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, | ||
| 429 | true); | ||
| 430 | } | ||
| 431 | case ComparisonMode::IfNotEqual: { | ||
| 432 | VideoCommon::LookupData object_1{gen_lookup(address)}; | ||
| 433 | VideoCommon::LookupData object_2{gen_lookup(address + 16)}; | ||
| 434 | return impl->runtime.HostConditionalRenderingCompareValues(object_1, object_2, qc_dirty, | ||
| 435 | false); | ||
| 436 | } | ||
| 437 | default: | ||
| 438 | return false; | ||
| 439 | } | ||
| 440 | } | ||
| 441 | |||
| 442 | // Async downloads | ||
| 443 | template <typename Traits> | ||
| 444 | void QueryCacheBase<Traits>::CommitAsyncFlushes() { | ||
| 445 | u64 mask{}; | ||
| 446 | { | ||
| 447 | std::scoped_lock lk(impl->flush_guard); | ||
| 448 | impl->ForEachStreamer([&mask](StreamerInterface* streamer) { | ||
| 449 | bool local_result = streamer->HasUnsyncedQueries(); | ||
| 450 | if (local_result) { | ||
| 451 | mask |= 1ULL << streamer->GetId(); | ||
| 452 | } | ||
| 453 | }); | ||
| 454 | impl->flushes_pending.push_back(mask); | ||
| 455 | } | ||
| 456 | std::function<void()> func([this] { UnregisterPending(); }); | ||
| 457 | impl->rasterizer.SyncOperation(std::move(func)); | ||
| 458 | if (mask == 0) { | ||
| 459 | return; | ||
| 460 | } | ||
| 461 | impl->ForEachStreamerIn(mask, | ||
| 462 | [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); | ||
| 463 | } | ||
| 464 | |||
| 465 | template <typename Traits> | ||
| 466 | bool QueryCacheBase<Traits>::HasUncommittedFlushes() const { | ||
| 467 | bool result = false; | ||
| 468 | impl->ForEachStreamer([&result](StreamerInterface* streamer) { | ||
| 469 | result |= streamer->HasUnsyncedQueries(); | ||
| 470 | return result; | ||
| 471 | }); | ||
| 472 | return result; | ||
| 473 | } | ||
| 474 | |||
| 475 | template <typename Traits> | ||
| 476 | bool QueryCacheBase<Traits>::ShouldWaitAsyncFlushes() { | ||
| 477 | std::scoped_lock lk(impl->flush_guard); | ||
| 478 | return !impl->flushes_pending.empty() && impl->flushes_pending.front() != 0ULL; | ||
| 479 | } | ||
| 480 | |||
| 481 | template <typename Traits> | ||
| 482 | void QueryCacheBase<Traits>::PopAsyncFlushes() { | ||
| 483 | u64 mask; | ||
| 484 | { | ||
| 485 | std::scoped_lock lk(impl->flush_guard); | ||
| 486 | mask = impl->flushes_pending.front(); | ||
| 487 | impl->flushes_pending.pop_front(); | ||
| 488 | } | ||
| 489 | if (mask == 0) { | ||
| 490 | return; | ||
| 491 | } | ||
| 492 | impl->ForEachStreamerIn(mask, | ||
| 493 | [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); }); | ||
| 494 | } | ||
| 495 | |||
| 496 | // Invalidation | ||
| 497 | |||
| 498 | template <typename Traits> | ||
| 499 | void QueryCacheBase<Traits>::InvalidateQuery(QueryCacheBase<Traits>::QueryLocation location) { | ||
| 500 | auto* query_base = impl->ObtainQuery(location); | ||
| 501 | if (!query_base) { | ||
| 502 | return; | ||
| 503 | } | ||
| 504 | query_base->flags |= QueryFlagBits::IsInvalidated; | ||
| 505 | } | ||
| 506 | |||
| 507 | template <typename Traits> | ||
| 508 | bool QueryCacheBase<Traits>::IsQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { | ||
| 509 | auto* query_base = impl->ObtainQuery(location); | ||
| 510 | if (!query_base) { | ||
| 511 | return false; | ||
| 512 | } | ||
| 513 | return True(query_base->flags & QueryFlagBits::IsHostManaged) && | ||
| 514 | False(query_base->flags & QueryFlagBits::IsGuestSynced); | ||
| 515 | } | ||
| 516 | |||
| 517 | template <typename Traits> | ||
| 518 | bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLocation location) { | ||
| 519 | auto* query_base = impl->ObtainQuery(location); | ||
| 520 | if (!query_base) { | ||
| 521 | return false; | ||
| 522 | } | ||
| 523 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | ||
| 524 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | ||
| 525 | auto* ptr = impl->cpu_memory.GetPointer(query_base->guest_address); | ||
| 526 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | ||
| 527 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | ||
| 528 | return false; | ||
| 529 | } | ||
| 530 | u32 value_l = static_cast<u32>(query_base->value); | ||
| 531 | std::memcpy(ptr, &value_l, sizeof(value_l)); | ||
| 532 | return false; | ||
| 533 | } | ||
| 534 | return True(query_base->flags & QueryFlagBits::IsHostManaged) && | ||
| 535 | False(query_base->flags & QueryFlagBits::IsGuestSynced); | ||
| 536 | } | ||
| 537 | |||
| 538 | template <typename Traits> | ||
| 539 | void QueryCacheBase<Traits>::RequestGuestHostSync() { | ||
| 540 | impl->rasterizer.ReleaseFences(); | ||
| 541 | } | ||
| 542 | |||
| 543 | } // namespace VideoCommon | ||
diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h new file mode 100644 index 000000000..55f508dd1 --- /dev/null +++ b/src/video_core/query_cache/query_cache_base.h | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <functional> | ||
| 7 | #include <mutex> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/assert.h" | ||
| 14 | #include "common/bit_field.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | #include "video_core/control/channel_state_cache.h" | ||
| 18 | #include "video_core/query_cache/query_base.h" | ||
| 19 | #include "video_core/query_cache/types.h" | ||
| 20 | |||
| 21 | namespace Core::Memory { | ||
| 22 | class Memory; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace VideoCore { | ||
| 26 | class RasterizerInterface; | ||
| 27 | } | ||
| 28 | |||
| 29 | namespace Tegra { | ||
| 30 | class GPU; | ||
| 31 | } | ||
| 32 | |||
| 33 | namespace VideoCommon { | ||
| 34 | |||
| 35 | struct LookupData { | ||
| 36 | VAddr address; | ||
| 37 | QueryBase* found_query; | ||
| 38 | }; | ||
| 39 | |||
| 40 | template <typename Traits> | ||
| 41 | class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 42 | using RuntimeType = typename Traits::RuntimeType; | ||
| 43 | |||
| 44 | public: | ||
| 45 | union QueryLocation { | ||
| 46 | BitField<27, 5, u32> stream_id; | ||
| 47 | BitField<0, 27, u32> query_id; | ||
| 48 | u32 raw; | ||
| 49 | |||
| 50 | std::pair<size_t, size_t> unpack() { | ||
| 51 | return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | |||
| 55 | explicit QueryCacheBase(Tegra::GPU& gpu, VideoCore::RasterizerInterface& rasterizer_, | ||
| 56 | Core::Memory::Memory& cpu_memory_, RuntimeType& runtime_); | ||
| 57 | |||
| 58 | ~QueryCacheBase(); | ||
| 59 | |||
| 60 | void InvalidateRegion(VAddr addr, std::size_t size) { | ||
| 61 | IterateCache<true>(addr, size, | ||
| 62 | [this](QueryLocation location) { InvalidateQuery(location); }); | ||
| 63 | } | ||
| 64 | |||
| 65 | void FlushRegion(VAddr addr, std::size_t size) { | ||
| 66 | bool result = false; | ||
| 67 | IterateCache<false>(addr, size, [this, &result](QueryLocation location) { | ||
| 68 | result |= SemiFlushQueryDirty(location); | ||
| 69 | return result; | ||
| 70 | }); | ||
| 71 | if (result) { | ||
| 72 | RequestGuestHostSync(); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | static u64 BuildMask(std::span<QueryType> types) { | ||
| 77 | u64 mask = 0; | ||
| 78 | for (auto query_type : types) { | ||
| 79 | mask |= 1ULL << (static_cast<u64>(query_type)); | ||
| 80 | } | ||
| 81 | return mask; | ||
| 82 | } | ||
| 83 | |||
| 84 | /// Return true when a CPU region is modified from the GPU | ||
| 85 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 86 | bool result = false; | ||
| 87 | IterateCache<false>(addr, size, [this, &result](QueryLocation location) { | ||
| 88 | result |= IsQueryDirty(location); | ||
| 89 | return result; | ||
| 90 | }); | ||
| 91 | return result; | ||
| 92 | } | ||
| 93 | |||
| 94 | void CounterEnable(QueryType counter_type, bool is_enabled); | ||
| 95 | |||
| 96 | void CounterReset(QueryType counter_type); | ||
| 97 | |||
| 98 | void CounterClose(QueryType counter_type); | ||
| 99 | |||
| 100 | void CounterReport(GPUVAddr addr, QueryType counter_type, QueryPropertiesFlags flags, | ||
| 101 | u32 payload, u32 subreport); | ||
| 102 | |||
| 103 | void NotifyWFI(); | ||
| 104 | |||
| 105 | bool AccelerateHostConditionalRendering(); | ||
| 106 | |||
| 107 | // Async downloads | ||
| 108 | void CommitAsyncFlushes(); | ||
| 109 | |||
| 110 | bool HasUncommittedFlushes() const; | ||
| 111 | |||
| 112 | bool ShouldWaitAsyncFlushes(); | ||
| 113 | |||
| 114 | void PopAsyncFlushes(); | ||
| 115 | |||
| 116 | void NotifySegment(bool resume); | ||
| 117 | |||
| 118 | void BindToChannel(s32 id) override; | ||
| 119 | |||
| 120 | protected: | ||
| 121 | template <bool remove_from_cache, typename Func> | ||
| 122 | void IterateCache(VAddr addr, std::size_t size, Func&& func) { | ||
| 123 | static constexpr bool RETURNS_BOOL = | ||
| 124 | std::is_same_v<std::invoke_result<Func, QueryLocation>, bool>; | ||
| 125 | const u64 addr_begin = addr; | ||
| 126 | const u64 addr_end = addr_begin + size; | ||
| 127 | |||
| 128 | const u64 page_end = addr_end >> Core::Memory::YUZU_PAGEBITS; | ||
| 129 | std::scoped_lock lock(cache_mutex); | ||
| 130 | for (u64 page = addr_begin >> Core::Memory::YUZU_PAGEBITS; page <= page_end; ++page) { | ||
| 131 | const u64 page_start = page << Core::Memory::YUZU_PAGEBITS; | ||
| 132 | const auto in_range = [page_start, addr_begin, addr_end](const u32 query_location) { | ||
| 133 | const u64 cache_begin = page_start + query_location; | ||
| 134 | const u64 cache_end = cache_begin + sizeof(u32); | ||
| 135 | return cache_begin < addr_end && addr_begin < cache_end; | ||
| 136 | }; | ||
| 137 | const auto& it = cached_queries.find(page); | ||
| 138 | if (it == std::end(cached_queries)) { | ||
| 139 | continue; | ||
| 140 | } | ||
| 141 | auto& contents = it->second; | ||
| 142 | for (auto& query : contents) { | ||
| 143 | if (!in_range(query.first)) { | ||
| 144 | continue; | ||
| 145 | } | ||
| 146 | if constexpr (RETURNS_BOOL) { | ||
| 147 | if (func(query.second)) { | ||
| 148 | return; | ||
| 149 | } | ||
| 150 | } else { | ||
| 151 | func(query.second); | ||
| 152 | } | ||
| 153 | } | ||
| 154 | if constexpr (remove_from_cache) { | ||
| 155 | const auto in_range2 = [&](const std::pair<u32, QueryLocation>& pair) { | ||
| 156 | return in_range(pair.first); | ||
| 157 | }; | ||
| 158 | std::erase_if(contents, in_range2); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; | ||
| 164 | |||
| 165 | void InvalidateQuery(QueryLocation location); | ||
| 166 | bool IsQueryDirty(QueryLocation location); | ||
| 167 | bool SemiFlushQueryDirty(QueryLocation location); | ||
| 168 | void RequestGuestHostSync(); | ||
| 169 | void UnregisterPending(); | ||
| 170 | |||
| 171 | std::unordered_map<u64, std::unordered_map<u32, QueryLocation>> cached_queries; | ||
| 172 | std::mutex cache_mutex; | ||
| 173 | |||
| 174 | struct QueryCacheBaseImpl; | ||
| 175 | friend struct QueryCacheBaseImpl; | ||
| 176 | friend RuntimeType; | ||
| 177 | |||
| 178 | std::unique_ptr<struct QueryCacheBaseImpl> impl; | ||
| 179 | }; | ||
| 180 | |||
| 181 | } // namespace VideoCommon \ No newline at end of file | ||
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h new file mode 100644 index 000000000..dd5f95b3c --- /dev/null +++ b/src/video_core/query_cache/query_stream.h | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <deque> | ||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/query_cache/bank_base.h" | ||
| 13 | #include "video_core/query_cache/query_base.h" | ||
| 14 | |||
| 15 | namespace VideoCommon { | ||
| 16 | |||
| 17 | class StreamerInterface { | ||
| 18 | public: | ||
| 19 | StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} | ||
| 20 | virtual ~StreamerInterface() = default; | ||
| 21 | |||
| 22 | virtual QueryBase* GetQuery(size_t id) = 0; | ||
| 23 | |||
| 24 | virtual void StartCounter() { | ||
| 25 | /* Do Nothing */ | ||
| 26 | } | ||
| 27 | |||
| 28 | virtual void PauseCounter() { | ||
| 29 | /* Do Nothing */ | ||
| 30 | } | ||
| 31 | |||
| 32 | virtual void ResetCounter() { | ||
| 33 | /* Do Nothing */ | ||
| 34 | } | ||
| 35 | |||
| 36 | virtual void CloseCounter() { | ||
| 37 | /* Do Nothing */ | ||
| 38 | } | ||
| 39 | |||
| 40 | virtual bool HasPendingSync() { | ||
| 41 | return false; | ||
| 42 | } | ||
| 43 | |||
| 44 | virtual void PresyncWrites() { | ||
| 45 | /* Do Nothing */ | ||
| 46 | } | ||
| 47 | |||
| 48 | virtual void SyncWrites() { | ||
| 49 | /* Do Nothing */ | ||
| 50 | } | ||
| 51 | |||
| 52 | virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, | ||
| 53 | std::optional<u32> subreport = std::nullopt) = 0; | ||
| 54 | |||
| 55 | virtual bool HasUnsyncedQueries() { | ||
| 56 | return false; | ||
| 57 | } | ||
| 58 | |||
| 59 | virtual void PushUnsyncedQueries() { | ||
| 60 | /* Do Nothing */ | ||
| 61 | } | ||
| 62 | |||
| 63 | virtual void PopUnsyncedQueries() { | ||
| 64 | /* Do Nothing */ | ||
| 65 | } | ||
| 66 | |||
| 67 | virtual void Free(size_t query_id) = 0; | ||
| 68 | |||
| 69 | size_t GetId() const { | ||
| 70 | return id; | ||
| 71 | } | ||
| 72 | |||
| 73 | protected: | ||
| 74 | const size_t id; | ||
| 75 | const u64 dependance_mask; | ||
| 76 | }; | ||
| 77 | |||
| 78 | template <typename QueryType> | ||
| 79 | class SimpleStreamer : public StreamerInterface { | ||
| 80 | public: | ||
| 81 | SimpleStreamer(size_t id_) : StreamerInterface{id_} {} | ||
| 82 | virtual ~SimpleStreamer() = default; | ||
| 83 | |||
| 84 | protected: | ||
| 85 | virtual QueryType* GetQuery(size_t query_id) override { | ||
| 86 | if (query_id < slot_queries.size()) { | ||
| 87 | return &slot_queries[query_id]; | ||
| 88 | } | ||
| 89 | return nullptr; | ||
| 90 | } | ||
| 91 | |||
| 92 | virtual void Free(size_t query_id) override { | ||
| 93 | std::scoped_lock lk(guard); | ||
| 94 | ReleaseQuery(query_id); | ||
| 95 | } | ||
| 96 | |||
| 97 | template <typename... Args, typename = decltype(QueryType(std::declval<Args>()...))> | ||
| 98 | size_t BuildQuery(Args&&... args) { | ||
| 99 | std::scoped_lock lk(guard); | ||
| 100 | if (!old_queries.empty()) { | ||
| 101 | size_t new_id = old_queries.front(); | ||
| 102 | old_queries.pop_front(); | ||
| 103 | new (&slot_queries[new_id]) QueryType(std::forward<Args>(args)...); | ||
| 104 | return new_id; | ||
| 105 | } | ||
| 106 | size_t new_id = slot_queries.size(); | ||
| 107 | slot_queries.emplace_back(std::forward<Args>(args)...); | ||
| 108 | return new_id; | ||
| 109 | } | ||
| 110 | |||
| 111 | void ReleaseQuery(size_t query_id) { | ||
| 112 | |||
| 113 | if (query_id < slot_queries.size()) { | ||
| 114 | old_queries.push_back(query_id); | ||
| 115 | return; | ||
| 116 | } | ||
| 117 | UNREACHABLE(); | ||
| 118 | } | ||
| 119 | |||
| 120 | std::mutex guard; | ||
| 121 | std::deque<QueryType> slot_queries; | ||
| 122 | std::deque<size_t> old_queries; | ||
| 123 | }; | ||
| 124 | |||
| 125 | } // namespace VideoCommon \ No newline at end of file | ||
diff --git a/src/video_core/query_cache/types.h b/src/video_core/query_cache/types.h new file mode 100644 index 000000000..e9226bbfc --- /dev/null +++ b/src/video_core/query_cache/types.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/common_funcs.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | enum class QueryPropertiesFlags : u32 { | ||
| 12 | HasTimeout = 1 << 0, | ||
| 13 | IsAFence = 1 << 1, | ||
| 14 | }; | ||
| 15 | DECLARE_ENUM_FLAG_OPERATORS(QueryPropertiesFlags) | ||
| 16 | |||
| 17 | // This should always be equivalent to maxwell3d Report Semaphore Reports | ||
| 18 | enum class QueryType : u32 { | ||
| 19 | Payload = 0, // "None" in docs, but confirmed via hardware to return the payload | ||
| 20 | VerticesGenerated = 1, | ||
| 21 | ZPassPixelCount = 2, | ||
| 22 | PrimitivesGenerated = 3, | ||
| 23 | AlphaBetaClocks = 4, | ||
| 24 | VertexShaderInvocations = 5, | ||
| 25 | StreamingPrimitivesNeededMinusSucceeded = 6, | ||
| 26 | GeometryShaderInvocations = 7, | ||
| 27 | GeometryShaderPrimitivesGenerated = 9, | ||
| 28 | ZCullStats0 = 10, | ||
| 29 | StreamingPrimitivesSucceeded = 11, | ||
| 30 | ZCullStats1 = 12, | ||
| 31 | StreamingPrimitivesNeeded = 13, | ||
| 32 | ZCullStats2 = 14, | ||
| 33 | ClipperInvocations = 15, | ||
| 34 | ZCullStats3 = 16, | ||
| 35 | ClipperPrimitivesGenerated = 17, | ||
| 36 | VtgPrimitivesOut = 18, | ||
| 37 | PixelShaderInvocations = 19, | ||
| 38 | ZPassPixelCount64 = 21, | ||
| 39 | IEEECleanColorTarget = 24, | ||
| 40 | IEEECleanZetaTarget = 25, | ||
| 41 | StreamingByteCount = 26, | ||
| 42 | TessellationInitInvocations = 27, | ||
| 43 | BoundingRectangle = 28, | ||
| 44 | TessellationShaderInvocations = 29, | ||
| 45 | TotalStreamingPrimitivesNeededMinusSucceeded = 30, | ||
| 46 | TessellationShaderPrimitivesGenerated = 31, | ||
| 47 | // max. | ||
| 48 | MaxQueryTypes, | ||
| 49 | }; | ||
| 50 | |||
| 51 | // Comparison modes for Host Conditional Rendering | ||
| 52 | enum class ComparisonMode : u32 { | ||
| 53 | False = 0, | ||
| 54 | True = 1, | ||
| 55 | Conditional = 2, | ||
| 56 | IfEqual = 3, | ||
| 57 | IfNotEqual = 4, | ||
| 58 | MaxComparisonMode, | ||
| 59 | }; | ||
| 60 | |||
| 61 | // Reduction ops. | ||
| 62 | enum class ReductionOp : u32 { | ||
| 63 | RedAdd = 0, | ||
| 64 | RedMin = 1, | ||
| 65 | RedMax = 2, | ||
| 66 | RedInc = 3, | ||
| 67 | RedDec = 4, | ||
| 68 | RedAnd = 5, | ||
| 69 | RedOr = 6, | ||
| 70 | RedXor = 7, | ||
| 71 | MaxReductionOp, | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace VideoCommon \ No newline at end of file | ||