diff options
Diffstat (limited to 'src')
25 files changed, 2916 insertions, 1748 deletions
diff --git a/src/common/intrusive_list.h b/src/common/intrusive_list.h new file mode 100644 index 000000000..d330dc1c2 --- /dev/null +++ b/src/common/intrusive_list.h | |||
| @@ -0,0 +1,631 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/common_funcs.h" | ||
| 7 | #include "common/parent_of_member.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | // Forward declare implementation class for Node. | ||
| 12 | namespace impl { | ||
| 13 | |||
| 14 | class IntrusiveListImpl; | ||
| 15 | |||
| 16 | } | ||
| 17 | |||
| 18 | class IntrusiveListNode { | ||
| 19 | YUZU_NON_COPYABLE(IntrusiveListNode); | ||
| 20 | |||
| 21 | private: | ||
| 22 | friend class impl::IntrusiveListImpl; | ||
| 23 | |||
| 24 | IntrusiveListNode* m_prev; | ||
| 25 | IntrusiveListNode* m_next; | ||
| 26 | |||
| 27 | public: | ||
| 28 | constexpr IntrusiveListNode() : m_prev(this), m_next(this) {} | ||
| 29 | |||
| 30 | constexpr bool IsLinked() const { | ||
| 31 | return m_next != this; | ||
| 32 | } | ||
| 33 | |||
| 34 | private: | ||
| 35 | constexpr void LinkPrev(IntrusiveListNode* node) { | ||
| 36 | // We can't link an already linked node. | ||
| 37 | ASSERT(!node->IsLinked()); | ||
| 38 | this->SplicePrev(node, node); | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr void SplicePrev(IntrusiveListNode* first, IntrusiveListNode* last) { | ||
| 42 | // Splice a range into the list. | ||
| 43 | auto last_prev = last->m_prev; | ||
| 44 | first->m_prev = m_prev; | ||
| 45 | last_prev->m_next = this; | ||
| 46 | m_prev->m_next = first; | ||
| 47 | m_prev = last_prev; | ||
| 48 | } | ||
| 49 | |||
| 50 | constexpr void LinkNext(IntrusiveListNode* node) { | ||
| 51 | // We can't link an already linked node. | ||
| 52 | ASSERT(!node->IsLinked()); | ||
| 53 | return this->SpliceNext(node, node); | ||
| 54 | } | ||
| 55 | |||
| 56 | constexpr void SpliceNext(IntrusiveListNode* first, IntrusiveListNode* last) { | ||
| 57 | // Splice a range into the list. | ||
| 58 | auto last_prev = last->m_prev; | ||
| 59 | first->m_prev = this; | ||
| 60 | last_prev->m_next = m_next; | ||
| 61 | m_next->m_prev = last_prev; | ||
| 62 | m_next = first; | ||
| 63 | } | ||
| 64 | |||
| 65 | constexpr void Unlink() { | ||
| 66 | this->Unlink(m_next); | ||
| 67 | } | ||
| 68 | |||
| 69 | constexpr void Unlink(IntrusiveListNode* last) { | ||
| 70 | // Unlink a node from a next node. | ||
| 71 | auto last_prev = last->m_prev; | ||
| 72 | m_prev->m_next = last; | ||
| 73 | last->m_prev = m_prev; | ||
| 74 | last_prev->m_next = this; | ||
| 75 | m_prev = last_prev; | ||
| 76 | } | ||
| 77 | |||
| 78 | constexpr IntrusiveListNode* GetPrev() { | ||
| 79 | return m_prev; | ||
| 80 | } | ||
| 81 | |||
| 82 | constexpr const IntrusiveListNode* GetPrev() const { | ||
| 83 | return m_prev; | ||
| 84 | } | ||
| 85 | |||
| 86 | constexpr IntrusiveListNode* GetNext() { | ||
| 87 | return m_next; | ||
| 88 | } | ||
| 89 | |||
| 90 | constexpr const IntrusiveListNode* GetNext() const { | ||
| 91 | return m_next; | ||
| 92 | } | ||
| 93 | }; | ||
| 94 | // DEPRECATED: static_assert(std::is_literal_type<IntrusiveListNode>::value); | ||
| 95 | |||
| 96 | namespace impl { | ||
| 97 | |||
| 98 | class IntrusiveListImpl { | ||
| 99 | YUZU_NON_COPYABLE(IntrusiveListImpl); | ||
| 100 | |||
| 101 | private: | ||
| 102 | IntrusiveListNode m_root_node; | ||
| 103 | |||
| 104 | public: | ||
| 105 | template <bool Const> | ||
| 106 | class Iterator; | ||
| 107 | |||
| 108 | using value_type = IntrusiveListNode; | ||
| 109 | using size_type = size_t; | ||
| 110 | using difference_type = ptrdiff_t; | ||
| 111 | using pointer = value_type*; | ||
| 112 | using const_pointer = const value_type*; | ||
| 113 | using reference = value_type&; | ||
| 114 | using const_reference = const value_type&; | ||
| 115 | using iterator = Iterator<false>; | ||
| 116 | using const_iterator = Iterator<true>; | ||
| 117 | using reverse_iterator = std::reverse_iterator<iterator>; | ||
| 118 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; | ||
| 119 | |||
| 120 | template <bool Const> | ||
| 121 | class Iterator { | ||
| 122 | public: | ||
| 123 | using iterator_category = std::bidirectional_iterator_tag; | ||
| 124 | using value_type = typename IntrusiveListImpl::value_type; | ||
| 125 | using difference_type = typename IntrusiveListImpl::difference_type; | ||
| 126 | using pointer = | ||
| 127 | std::conditional_t<Const, IntrusiveListImpl::const_pointer, IntrusiveListImpl::pointer>; | ||
| 128 | using reference = std::conditional_t<Const, IntrusiveListImpl::const_reference, | ||
| 129 | IntrusiveListImpl::reference>; | ||
| 130 | |||
| 131 | private: | ||
| 132 | pointer m_node; | ||
| 133 | |||
| 134 | public: | ||
| 135 | constexpr explicit Iterator(pointer n) : m_node(n) {} | ||
| 136 | |||
| 137 | constexpr bool operator==(const Iterator& rhs) const { | ||
| 138 | return m_node == rhs.m_node; | ||
| 139 | } | ||
| 140 | |||
| 141 | constexpr pointer operator->() const { | ||
| 142 | return m_node; | ||
| 143 | } | ||
| 144 | |||
| 145 | constexpr reference operator*() const { | ||
| 146 | return *m_node; | ||
| 147 | } | ||
| 148 | |||
| 149 | constexpr Iterator& operator++() { | ||
| 150 | m_node = m_node->m_next; | ||
| 151 | return *this; | ||
| 152 | } | ||
| 153 | |||
| 154 | constexpr Iterator& operator--() { | ||
| 155 | m_node = m_node->m_prev; | ||
| 156 | return *this; | ||
| 157 | } | ||
| 158 | |||
| 159 | constexpr Iterator operator++(int) { | ||
| 160 | const Iterator it{*this}; | ||
| 161 | ++(*this); | ||
| 162 | return it; | ||
| 163 | } | ||
| 164 | |||
| 165 | constexpr Iterator operator--(int) { | ||
| 166 | const Iterator it{*this}; | ||
| 167 | --(*this); | ||
| 168 | return it; | ||
| 169 | } | ||
| 170 | |||
| 171 | constexpr operator Iterator<true>() const { | ||
| 172 | return Iterator<true>(m_node); | ||
| 173 | } | ||
| 174 | |||
| 175 | constexpr Iterator<false> GetNonConstIterator() const { | ||
| 176 | return Iterator<false>(const_cast<IntrusiveListImpl::pointer>(m_node)); | ||
| 177 | } | ||
| 178 | }; | ||
| 179 | |||
| 180 | public: | ||
| 181 | constexpr IntrusiveListImpl() : m_root_node() {} | ||
| 182 | |||
| 183 | // Iterator accessors. | ||
| 184 | constexpr iterator begin() { | ||
| 185 | return iterator(m_root_node.GetNext()); | ||
| 186 | } | ||
| 187 | |||
| 188 | constexpr const_iterator begin() const { | ||
| 189 | return const_iterator(m_root_node.GetNext()); | ||
| 190 | } | ||
| 191 | |||
| 192 | constexpr iterator end() { | ||
| 193 | return iterator(std::addressof(m_root_node)); | ||
| 194 | } | ||
| 195 | |||
| 196 | constexpr const_iterator end() const { | ||
| 197 | return const_iterator(std::addressof(m_root_node)); | ||
| 198 | } | ||
| 199 | |||
| 200 | constexpr iterator iterator_to(reference v) { | ||
| 201 | // Only allow iterator_to for values in lists. | ||
| 202 | ASSERT(v.IsLinked()); | ||
| 203 | return iterator(std::addressof(v)); | ||
| 204 | } | ||
| 205 | |||
| 206 | constexpr const_iterator iterator_to(const_reference v) const { | ||
| 207 | // Only allow iterator_to for values in lists. | ||
| 208 | ASSERT(v.IsLinked()); | ||
| 209 | return const_iterator(std::addressof(v)); | ||
| 210 | } | ||
| 211 | |||
| 212 | // Content management. | ||
| 213 | constexpr bool empty() const { | ||
| 214 | return !m_root_node.IsLinked(); | ||
| 215 | } | ||
| 216 | |||
| 217 | constexpr size_type size() const { | ||
| 218 | return static_cast<size_type>(std::distance(this->begin(), this->end())); | ||
| 219 | } | ||
| 220 | |||
| 221 | constexpr reference back() { | ||
| 222 | return *m_root_node.GetPrev(); | ||
| 223 | } | ||
| 224 | |||
| 225 | constexpr const_reference back() const { | ||
| 226 | return *m_root_node.GetPrev(); | ||
| 227 | } | ||
| 228 | |||
| 229 | constexpr reference front() { | ||
| 230 | return *m_root_node.GetNext(); | ||
| 231 | } | ||
| 232 | |||
| 233 | constexpr const_reference front() const { | ||
| 234 | return *m_root_node.GetNext(); | ||
| 235 | } | ||
| 236 | |||
| 237 | constexpr void push_back(reference node) { | ||
| 238 | m_root_node.LinkPrev(std::addressof(node)); | ||
| 239 | } | ||
| 240 | |||
| 241 | constexpr void push_front(reference node) { | ||
| 242 | m_root_node.LinkNext(std::addressof(node)); | ||
| 243 | } | ||
| 244 | |||
| 245 | constexpr void pop_back() { | ||
| 246 | m_root_node.GetPrev()->Unlink(); | ||
| 247 | } | ||
| 248 | |||
| 249 | constexpr void pop_front() { | ||
| 250 | m_root_node.GetNext()->Unlink(); | ||
| 251 | } | ||
| 252 | |||
| 253 | constexpr iterator insert(const_iterator pos, reference node) { | ||
| 254 | pos.GetNonConstIterator()->LinkPrev(std::addressof(node)); | ||
| 255 | return iterator(std::addressof(node)); | ||
| 256 | } | ||
| 257 | |||
| 258 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o) { | ||
| 259 | splice_impl(pos, o.begin(), o.end()); | ||
| 260 | } | ||
| 261 | |||
| 262 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first) { | ||
| 263 | const_iterator last(first); | ||
| 264 | std::advance(last, 1); | ||
| 265 | splice_impl(pos, first, last); | ||
| 266 | } | ||
| 267 | |||
| 268 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first, | ||
| 269 | const_iterator last) { | ||
| 270 | splice_impl(pos, first, last); | ||
| 271 | } | ||
| 272 | |||
| 273 | constexpr iterator erase(const_iterator pos) { | ||
| 274 | if (pos == this->end()) { | ||
| 275 | return this->end(); | ||
| 276 | } | ||
| 277 | iterator it(pos.GetNonConstIterator()); | ||
| 278 | (it++)->Unlink(); | ||
| 279 | return it; | ||
| 280 | } | ||
| 281 | |||
| 282 | constexpr void clear() { | ||
| 283 | while (!this->empty()) { | ||
| 284 | this->pop_front(); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | private: | ||
| 289 | constexpr void splice_impl(const_iterator _pos, const_iterator _first, const_iterator _last) { | ||
| 290 | if (_first == _last) { | ||
| 291 | return; | ||
| 292 | } | ||
| 293 | iterator pos(_pos.GetNonConstIterator()); | ||
| 294 | iterator first(_first.GetNonConstIterator()); | ||
| 295 | iterator last(_last.GetNonConstIterator()); | ||
| 296 | first->Unlink(std::addressof(*last)); | ||
| 297 | pos->SplicePrev(std::addressof(*first), std::addressof(*first)); | ||
| 298 | } | ||
| 299 | }; | ||
| 300 | |||
| 301 | } // namespace impl | ||
| 302 | |||
| 303 | template <class T, class Traits> | ||
| 304 | class IntrusiveList { | ||
| 305 | YUZU_NON_COPYABLE(IntrusiveList); | ||
| 306 | |||
| 307 | private: | ||
| 308 | impl::IntrusiveListImpl m_impl; | ||
| 309 | |||
| 310 | public: | ||
| 311 | template <bool Const> | ||
| 312 | class Iterator; | ||
| 313 | |||
| 314 | using value_type = T; | ||
| 315 | using size_type = size_t; | ||
| 316 | using difference_type = ptrdiff_t; | ||
| 317 | using pointer = value_type*; | ||
| 318 | using const_pointer = const value_type*; | ||
| 319 | using reference = value_type&; | ||
| 320 | using const_reference = const value_type&; | ||
| 321 | using iterator = Iterator<false>; | ||
| 322 | using const_iterator = Iterator<true>; | ||
| 323 | using reverse_iterator = std::reverse_iterator<iterator>; | ||
| 324 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; | ||
| 325 | |||
| 326 | template <bool Const> | ||
| 327 | class Iterator { | ||
| 328 | public: | ||
| 329 | friend class Common::IntrusiveList<T, Traits>; | ||
| 330 | |||
| 331 | using ImplIterator = | ||
| 332 | std::conditional_t<Const, Common::impl::IntrusiveListImpl::const_iterator, | ||
| 333 | Common::impl::IntrusiveListImpl::iterator>; | ||
| 334 | |||
| 335 | using iterator_category = std::bidirectional_iterator_tag; | ||
| 336 | using value_type = typename IntrusiveList::value_type; | ||
| 337 | using difference_type = typename IntrusiveList::difference_type; | ||
| 338 | using pointer = | ||
| 339 | std::conditional_t<Const, IntrusiveList::const_pointer, IntrusiveList::pointer>; | ||
| 340 | using reference = | ||
| 341 | std::conditional_t<Const, IntrusiveList::const_reference, IntrusiveList::reference>; | ||
| 342 | |||
| 343 | private: | ||
| 344 | ImplIterator m_iterator; | ||
| 345 | |||
| 346 | private: | ||
| 347 | constexpr explicit Iterator(ImplIterator it) : m_iterator(it) {} | ||
| 348 | |||
| 349 | constexpr ImplIterator GetImplIterator() const { | ||
| 350 | return m_iterator; | ||
| 351 | } | ||
| 352 | |||
| 353 | public: | ||
| 354 | constexpr bool operator==(const Iterator& rhs) const { | ||
| 355 | return m_iterator == rhs.m_iterator; | ||
| 356 | } | ||
| 357 | |||
| 358 | constexpr pointer operator->() const { | ||
| 359 | return std::addressof(Traits::GetParent(*m_iterator)); | ||
| 360 | } | ||
| 361 | |||
| 362 | constexpr reference operator*() const { | ||
| 363 | return Traits::GetParent(*m_iterator); | ||
| 364 | } | ||
| 365 | |||
| 366 | constexpr Iterator& operator++() { | ||
| 367 | ++m_iterator; | ||
| 368 | return *this; | ||
| 369 | } | ||
| 370 | |||
| 371 | constexpr Iterator& operator--() { | ||
| 372 | --m_iterator; | ||
| 373 | return *this; | ||
| 374 | } | ||
| 375 | |||
| 376 | constexpr Iterator operator++(int) { | ||
| 377 | const Iterator it{*this}; | ||
| 378 | ++m_iterator; | ||
| 379 | return it; | ||
| 380 | } | ||
| 381 | |||
| 382 | constexpr Iterator operator--(int) { | ||
| 383 | const Iterator it{*this}; | ||
| 384 | --m_iterator; | ||
| 385 | return it; | ||
| 386 | } | ||
| 387 | |||
| 388 | constexpr operator Iterator<true>() const { | ||
| 389 | return Iterator<true>(m_iterator); | ||
| 390 | } | ||
| 391 | }; | ||
| 392 | |||
| 393 | private: | ||
| 394 | static constexpr IntrusiveListNode& GetNode(reference ref) { | ||
| 395 | return Traits::GetNode(ref); | ||
| 396 | } | ||
| 397 | |||
| 398 | static constexpr IntrusiveListNode const& GetNode(const_reference ref) { | ||
| 399 | return Traits::GetNode(ref); | ||
| 400 | } | ||
| 401 | |||
| 402 | static constexpr reference GetParent(IntrusiveListNode& node) { | ||
| 403 | return Traits::GetParent(node); | ||
| 404 | } | ||
| 405 | |||
| 406 | static constexpr const_reference GetParent(IntrusiveListNode const& node) { | ||
| 407 | return Traits::GetParent(node); | ||
| 408 | } | ||
| 409 | |||
| 410 | public: | ||
| 411 | constexpr IntrusiveList() : m_impl() {} | ||
| 412 | |||
| 413 | // Iterator accessors. | ||
| 414 | constexpr iterator begin() { | ||
| 415 | return iterator(m_impl.begin()); | ||
| 416 | } | ||
| 417 | |||
| 418 | constexpr const_iterator begin() const { | ||
| 419 | return const_iterator(m_impl.begin()); | ||
| 420 | } | ||
| 421 | |||
| 422 | constexpr iterator end() { | ||
| 423 | return iterator(m_impl.end()); | ||
| 424 | } | ||
| 425 | |||
| 426 | constexpr const_iterator end() const { | ||
| 427 | return const_iterator(m_impl.end()); | ||
| 428 | } | ||
| 429 | |||
| 430 | constexpr const_iterator cbegin() const { | ||
| 431 | return this->begin(); | ||
| 432 | } | ||
| 433 | |||
| 434 | constexpr const_iterator cend() const { | ||
| 435 | return this->end(); | ||
| 436 | } | ||
| 437 | |||
| 438 | constexpr reverse_iterator rbegin() { | ||
| 439 | return reverse_iterator(this->end()); | ||
| 440 | } | ||
| 441 | |||
| 442 | constexpr const_reverse_iterator rbegin() const { | ||
| 443 | return const_reverse_iterator(this->end()); | ||
| 444 | } | ||
| 445 | |||
| 446 | constexpr reverse_iterator rend() { | ||
| 447 | return reverse_iterator(this->begin()); | ||
| 448 | } | ||
| 449 | |||
| 450 | constexpr const_reverse_iterator rend() const { | ||
| 451 | return const_reverse_iterator(this->begin()); | ||
| 452 | } | ||
| 453 | |||
| 454 | constexpr const_reverse_iterator crbegin() const { | ||
| 455 | return this->rbegin(); | ||
| 456 | } | ||
| 457 | |||
| 458 | constexpr const_reverse_iterator crend() const { | ||
| 459 | return this->rend(); | ||
| 460 | } | ||
| 461 | |||
| 462 | constexpr iterator iterator_to(reference v) { | ||
| 463 | return iterator(m_impl.iterator_to(GetNode(v))); | ||
| 464 | } | ||
| 465 | |||
| 466 | constexpr const_iterator iterator_to(const_reference v) const { | ||
| 467 | return const_iterator(m_impl.iterator_to(GetNode(v))); | ||
| 468 | } | ||
| 469 | |||
| 470 | // Content management. | ||
| 471 | constexpr bool empty() const { | ||
| 472 | return m_impl.empty(); | ||
| 473 | } | ||
| 474 | |||
| 475 | constexpr size_type size() const { | ||
| 476 | return m_impl.size(); | ||
| 477 | } | ||
| 478 | |||
| 479 | constexpr reference back() { | ||
| 480 | return GetParent(m_impl.back()); | ||
| 481 | } | ||
| 482 | |||
| 483 | constexpr const_reference back() const { | ||
| 484 | return GetParent(m_impl.back()); | ||
| 485 | } | ||
| 486 | |||
| 487 | constexpr reference front() { | ||
| 488 | return GetParent(m_impl.front()); | ||
| 489 | } | ||
| 490 | |||
| 491 | constexpr const_reference front() const { | ||
| 492 | return GetParent(m_impl.front()); | ||
| 493 | } | ||
| 494 | |||
| 495 | constexpr void push_back(reference ref) { | ||
| 496 | m_impl.push_back(GetNode(ref)); | ||
| 497 | } | ||
| 498 | |||
| 499 | constexpr void push_front(reference ref) { | ||
| 500 | m_impl.push_front(GetNode(ref)); | ||
| 501 | } | ||
| 502 | |||
| 503 | constexpr void pop_back() { | ||
| 504 | m_impl.pop_back(); | ||
| 505 | } | ||
| 506 | |||
| 507 | constexpr void pop_front() { | ||
| 508 | m_impl.pop_front(); | ||
| 509 | } | ||
| 510 | |||
| 511 | constexpr iterator insert(const_iterator pos, reference ref) { | ||
| 512 | return iterator(m_impl.insert(pos.GetImplIterator(), GetNode(ref))); | ||
| 513 | } | ||
| 514 | |||
| 515 | constexpr void splice(const_iterator pos, IntrusiveList& o) { | ||
| 516 | m_impl.splice(pos.GetImplIterator(), o.m_impl); | ||
| 517 | } | ||
| 518 | |||
| 519 | constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first) { | ||
| 520 | m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator()); | ||
| 521 | } | ||
| 522 | |||
| 523 | constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first, | ||
| 524 | const_iterator last) { | ||
| 525 | m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator(), | ||
| 526 | last.GetImplIterator()); | ||
| 527 | } | ||
| 528 | |||
| 529 | constexpr iterator erase(const_iterator pos) { | ||
| 530 | return iterator(m_impl.erase(pos.GetImplIterator())); | ||
| 531 | } | ||
| 532 | |||
| 533 | constexpr void clear() { | ||
| 534 | m_impl.clear(); | ||
| 535 | } | ||
| 536 | }; | ||
| 537 | |||
| 538 | template <auto T, class Derived = Common::impl::GetParentType<T>> | ||
| 539 | class IntrusiveListMemberTraits; | ||
| 540 | |||
| 541 | template <class Parent, IntrusiveListNode Parent::*Member, class Derived> | ||
| 542 | class IntrusiveListMemberTraits<Member, Derived> { | ||
| 543 | public: | ||
| 544 | using ListType = IntrusiveList<Derived, IntrusiveListMemberTraits>; | ||
| 545 | |||
| 546 | private: | ||
| 547 | friend class IntrusiveList<Derived, IntrusiveListMemberTraits>; | ||
| 548 | |||
| 549 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 550 | return parent.*Member; | ||
| 551 | } | ||
| 552 | |||
| 553 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 554 | return parent.*Member; | ||
| 555 | } | ||
| 556 | |||
| 557 | static Derived& GetParent(IntrusiveListNode& node) { | ||
| 558 | return Common::GetParentReference<Member, Derived>(std::addressof(node)); | ||
| 559 | } | ||
| 560 | |||
| 561 | static Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 562 | return Common::GetParentReference<Member, Derived>(std::addressof(node)); | ||
| 563 | } | ||
| 564 | }; | ||
| 565 | |||
| 566 | template <auto T, class Derived = Common::impl::GetParentType<T>> | ||
| 567 | class IntrusiveListMemberTraitsByNonConstexprOffsetOf; | ||
| 568 | |||
| 569 | template <class Parent, IntrusiveListNode Parent::*Member, class Derived> | ||
| 570 | class IntrusiveListMemberTraitsByNonConstexprOffsetOf<Member, Derived> { | ||
| 571 | public: | ||
| 572 | using ListType = IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>; | ||
| 573 | |||
| 574 | private: | ||
| 575 | friend class IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>; | ||
| 576 | |||
| 577 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 578 | return parent.*Member; | ||
| 579 | } | ||
| 580 | |||
| 581 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 582 | return parent.*Member; | ||
| 583 | } | ||
| 584 | |||
| 585 | static Derived& GetParent(IntrusiveListNode& node) { | ||
| 586 | return *reinterpret_cast<Derived*>(reinterpret_cast<char*>(std::addressof(node)) - | ||
| 587 | GetOffset()); | ||
| 588 | } | ||
| 589 | |||
| 590 | static Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 591 | return *reinterpret_cast<const Derived*>( | ||
| 592 | reinterpret_cast<const char*>(std::addressof(node)) - GetOffset()); | ||
| 593 | } | ||
| 594 | |||
| 595 | static uintptr_t GetOffset() { | ||
| 596 | return reinterpret_cast<uintptr_t>(std::addressof(reinterpret_cast<Derived*>(0)->*Member)); | ||
| 597 | } | ||
| 598 | }; | ||
| 599 | |||
| 600 | template <class Derived> | ||
| 601 | class IntrusiveListBaseNode : public IntrusiveListNode {}; | ||
| 602 | |||
| 603 | template <class Derived> | ||
| 604 | class IntrusiveListBaseTraits { | ||
| 605 | public: | ||
| 606 | using ListType = IntrusiveList<Derived, IntrusiveListBaseTraits>; | ||
| 607 | |||
| 608 | private: | ||
| 609 | friend class IntrusiveList<Derived, IntrusiveListBaseTraits>; | ||
| 610 | |||
| 611 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 612 | return static_cast<IntrusiveListNode&>( | ||
| 613 | static_cast<IntrusiveListBaseNode<Derived>&>(parent)); | ||
| 614 | } | ||
| 615 | |||
| 616 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 617 | return static_cast<const IntrusiveListNode&>( | ||
| 618 | static_cast<const IntrusiveListBaseNode<Derived>&>(parent)); | ||
| 619 | } | ||
| 620 | |||
| 621 | static constexpr Derived& GetParent(IntrusiveListNode& node) { | ||
| 622 | return static_cast<Derived&>(static_cast<IntrusiveListBaseNode<Derived>&>(node)); | ||
| 623 | } | ||
| 624 | |||
| 625 | static constexpr Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 626 | return static_cast<const Derived&>( | ||
| 627 | static_cast<const IntrusiveListBaseNode<Derived>&>(node)); | ||
| 628 | } | ||
| 629 | }; | ||
| 630 | |||
| 631 | } // namespace Common | ||
diff --git a/src/core/hle/kernel/k_event_info.h b/src/core/hle/kernel/k_event_info.h index 25b3ff594..eacfa5dc6 100644 --- a/src/core/hle/kernel/k_event_info.h +++ b/src/core/hle/kernel/k_event_info.h | |||
| @@ -5,14 +5,15 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include <boost/intrusive/list.hpp> | 8 | #include "common/intrusive_list.h" |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/slab_helpers.h" | 10 | #include "core/hle/kernel/slab_helpers.h" |
| 11 | #include "core/hle/kernel/svc_types.h" | 11 | #include "core/hle/kernel/svc_types.h" |
| 12 | 12 | ||
| 13 | namespace Kernel { | 13 | namespace Kernel { |
| 14 | 14 | ||
| 15 | class KEventInfo : public KSlabAllocated<KEventInfo>, public boost::intrusive::list_base_hook<> { | 15 | class KEventInfo : public KSlabAllocated<KEventInfo>, |
| 16 | public Common::IntrusiveListBaseNode<KEventInfo> { | ||
| 16 | public: | 17 | public: |
| 17 | struct InfoCreateThread { | 18 | struct InfoCreateThread { |
| 18 | u32 thread_id{}; | 19 | u32 thread_id{}; |
diff --git a/src/core/hle/kernel/k_object_name.h b/src/core/hle/kernel/k_object_name.h index 2d97fc777..a8876fe37 100644 --- a/src/core/hle/kernel/k_object_name.h +++ b/src/core/hle/kernel/k_object_name.h | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <boost/intrusive/list.hpp> | 8 | |
| 9 | #include "common/intrusive_list.h" | ||
| 9 | 10 | ||
| 10 | #include "core/hle/kernel/k_light_lock.h" | 11 | #include "core/hle/kernel/k_light_lock.h" |
| 11 | #include "core/hle/kernel/slab_helpers.h" | 12 | #include "core/hle/kernel/slab_helpers.h" |
| @@ -15,13 +16,14 @@ namespace Kernel { | |||
| 15 | 16 | ||
| 16 | class KObjectNameGlobalData; | 17 | class KObjectNameGlobalData; |
| 17 | 18 | ||
| 18 | class KObjectName : public KSlabAllocated<KObjectName>, public boost::intrusive::list_base_hook<> { | 19 | class KObjectName : public KSlabAllocated<KObjectName>, |
| 20 | public Common::IntrusiveListBaseNode<KObjectName> { | ||
| 19 | public: | 21 | public: |
| 20 | explicit KObjectName(KernelCore&) {} | 22 | explicit KObjectName(KernelCore&) {} |
| 21 | virtual ~KObjectName() = default; | 23 | virtual ~KObjectName() = default; |
| 22 | 24 | ||
| 23 | static constexpr size_t NameLengthMax = 12; | 25 | static constexpr size_t NameLengthMax = 12; |
| 24 | using List = boost::intrusive::list<KObjectName>; | 26 | using List = Common::IntrusiveListBaseTraits<KObjectName>::ListType; |
| 25 | 27 | ||
| 26 | static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); | 28 | static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); |
| 27 | static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); | 29 | static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); |
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h index 21c040e62..625280290 100644 --- a/src/core/hle/kernel/k_server_port.h +++ b/src/core/hle/kernel/k_server_port.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | 9 | ||
| 10 | #include <boost/intrusive/list.hpp> | 10 | #include "common/intrusive_list.h" |
| 11 | 11 | ||
| 12 | #include "core/hle/kernel/k_server_session.h" | 12 | #include "core/hle/kernel/k_server_session.h" |
| 13 | #include "core/hle/kernel/k_synchronization_object.h" | 13 | #include "core/hle/kernel/k_synchronization_object.h" |
| @@ -42,7 +42,7 @@ public: | |||
| 42 | bool IsSignaled() const override; | 42 | bool IsSignaled() const override; |
| 43 | 43 | ||
| 44 | private: | 44 | private: |
| 45 | using SessionList = boost::intrusive::list<KServerSession>; | 45 | using SessionList = Common::IntrusiveListBaseTraits<KServerSession>::ListType; |
| 46 | 46 | ||
| 47 | void CleanupSessions(); | 47 | void CleanupSessions(); |
| 48 | 48 | ||
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h index 5ee02f556..403891919 100644 --- a/src/core/hle/kernel/k_server_session.h +++ b/src/core/hle/kernel/k_server_session.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | 10 | ||
| 11 | #include <boost/intrusive/list.hpp> | 11 | #include "common/intrusive_list.h" |
| 12 | 12 | ||
| 13 | #include "core/hle/kernel/k_light_lock.h" | 13 | #include "core/hle/kernel/k_light_lock.h" |
| 14 | #include "core/hle/kernel/k_session_request.h" | 14 | #include "core/hle/kernel/k_session_request.h" |
| @@ -27,7 +27,7 @@ class KSession; | |||
| 27 | class KThread; | 27 | class KThread; |
| 28 | 28 | ||
| 29 | class KServerSession final : public KSynchronizationObject, | 29 | class KServerSession final : public KSynchronizationObject, |
| 30 | public boost::intrusive::list_base_hook<> { | 30 | public Common::IntrusiveListBaseNode<KServerSession> { |
| 31 | KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); | 31 | KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); |
| 32 | 32 | ||
| 33 | friend class ServiceThread; | 33 | friend class ServiceThread; |
| @@ -67,7 +67,8 @@ private: | |||
| 67 | KSession* m_parent{}; | 67 | KSession* m_parent{}; |
| 68 | 68 | ||
| 69 | /// List of threads which are pending a reply. | 69 | /// List of threads which are pending a reply. |
| 70 | boost::intrusive::list<KSessionRequest> m_request_list{}; | 70 | using RequestList = Common::IntrusiveListBaseTraits<KSessionRequest>::ListType; |
| 71 | RequestList m_request_list{}; | ||
| 71 | KSessionRequest* m_current_request{}; | 72 | KSessionRequest* m_current_request{}; |
| 72 | 73 | ||
| 73 | KLightLock m_lock; | 74 | KLightLock m_lock; |
diff --git a/src/core/hle/kernel/k_session_request.h b/src/core/hle/kernel/k_session_request.h index b5f04907b..283669e0a 100644 --- a/src/core/hle/kernel/k_session_request.h +++ b/src/core/hle/kernel/k_session_request.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include "common/intrusive_list.h" | ||
| 9 | |||
| 8 | #include "core/hle/kernel/k_auto_object.h" | 10 | #include "core/hle/kernel/k_auto_object.h" |
| 9 | #include "core/hle/kernel/k_event.h" | 11 | #include "core/hle/kernel/k_event.h" |
| 10 | #include "core/hle/kernel/k_memory_block.h" | 12 | #include "core/hle/kernel/k_memory_block.h" |
| @@ -16,7 +18,7 @@ namespace Kernel { | |||
| 16 | 18 | ||
| 17 | class KSessionRequest final : public KSlabAllocated<KSessionRequest>, | 19 | class KSessionRequest final : public KSlabAllocated<KSessionRequest>, |
| 18 | public KAutoObject, | 20 | public KAutoObject, |
| 19 | public boost::intrusive::list_base_hook<> { | 21 | public Common::IntrusiveListBaseNode<KSessionRequest> { |
| 20 | KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); | 22 | KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); |
| 21 | 23 | ||
| 22 | public: | 24 | public: |
diff --git a/src/core/hle/kernel/k_shared_memory_info.h b/src/core/hle/kernel/k_shared_memory_info.h index 75b73ba39..2d8ff20d6 100644 --- a/src/core/hle/kernel/k_shared_memory_info.h +++ b/src/core/hle/kernel/k_shared_memory_info.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <boost/intrusive/list.hpp> | 6 | #include "common/intrusive_list.h" |
| 7 | 7 | ||
| 8 | #include "core/hle/kernel/slab_helpers.h" | 8 | #include "core/hle/kernel/slab_helpers.h" |
| 9 | 9 | ||
| @@ -12,7 +12,7 @@ namespace Kernel { | |||
| 12 | class KSharedMemory; | 12 | class KSharedMemory; |
| 13 | 13 | ||
| 14 | class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, | 14 | class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, |
| 15 | public boost::intrusive::list_base_hook<> { | 15 | public Common::IntrusiveListBaseNode<KSharedMemoryInfo> { |
| 16 | 16 | ||
| 17 | public: | 17 | public: |
| 18 | explicit KSharedMemoryInfo(KernelCore&) {} | 18 | explicit KSharedMemoryInfo(KernelCore&) {} |
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index 9c1a41128..f9814ac8f 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include <vector> | 13 | #include <vector> |
| 14 | 14 | ||
| 15 | #include <boost/intrusive/list.hpp> | 15 | #include "common/intrusive_list.h" |
| 16 | 16 | ||
| 17 | #include "common/intrusive_red_black_tree.h" | 17 | #include "common/intrusive_red_black_tree.h" |
| 18 | #include "common/spin_lock.h" | 18 | #include "common/spin_lock.h" |
| @@ -119,7 +119,7 @@ s32 GetCurrentCoreId(KernelCore& kernel); | |||
| 119 | Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); | 119 | Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); |
| 120 | 120 | ||
| 121 | class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, | 121 | class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, |
| 122 | public boost::intrusive::list_base_hook<>, | 122 | public Common::IntrusiveListBaseNode<KThread>, |
| 123 | public KTimerTask { | 123 | public KTimerTask { |
| 124 | KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); | 124 | KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); |
| 125 | 125 | ||
| @@ -138,7 +138,7 @@ public: | |||
| 138 | public: | 138 | public: |
| 139 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; | 139 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; |
| 140 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; | 140 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; |
| 141 | using WaiterList = boost::intrusive::list<KThread>; | 141 | using WaiterList = Common::IntrusiveListBaseTraits<KThread>::ListType; |
| 142 | 142 | ||
| 143 | /** | 143 | /** |
| 144 | * Gets the thread's current priority | 144 | * Gets the thread's current priority |
| @@ -750,8 +750,9 @@ private: | |||
| 750 | ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; | 750 | ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; |
| 751 | 751 | ||
| 752 | public: | 752 | public: |
| 753 | class LockWithPriorityInheritanceInfo : public KSlabAllocated<LockWithPriorityInheritanceInfo>, | 753 | class LockWithPriorityInheritanceInfo |
| 754 | public boost::intrusive::list_base_hook<> { | 754 | : public KSlabAllocated<LockWithPriorityInheritanceInfo>, |
| 755 | public Common::IntrusiveListBaseNode<LockWithPriorityInheritanceInfo> { | ||
| 755 | public: | 756 | public: |
| 756 | explicit LockWithPriorityInheritanceInfo(KernelCore&) {} | 757 | explicit LockWithPriorityInheritanceInfo(KernelCore&) {} |
| 757 | 758 | ||
| @@ -839,7 +840,7 @@ public: | |||
| 839 | 840 | ||
| 840 | private: | 841 | private: |
| 841 | using LockWithPriorityInheritanceInfoList = | 842 | using LockWithPriorityInheritanceInfoList = |
| 842 | boost::intrusive::list<LockWithPriorityInheritanceInfo>; | 843 | Common::IntrusiveListBaseTraits<LockWithPriorityInheritanceInfo>::ListType; |
| 843 | 844 | ||
| 844 | ConditionVariableThreadTree* m_condvar_tree{}; | 845 | ConditionVariableThreadTree* m_condvar_tree{}; |
| 845 | u64 m_condvar_key{}; | 846 | u64 m_condvar_key{}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0cd87a48f..fee510f7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -473,7 +473,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { | |||
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | void EmitSetSampleMask(EmitContext& ctx, Id value) { | 475 | void EmitSetSampleMask(EmitContext& ctx, Id value) { |
| 476 | ctx.OpStore(ctx.sample_mask, value); | 476 | const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)}; |
| 477 | ctx.OpStore(pointer, value); | ||
| 477 | } | 478 | } |
| 478 | 479 | ||
| 479 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | 480 | void EmitSetFragDepth(EmitContext& ctx, Id value) { |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d48d4860e..47739794f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -1572,7 +1572,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { | |||
| 1572 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); | 1572 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); |
| 1573 | } | 1573 | } |
| 1574 | if (info.stores_sample_mask) { | 1574 | if (info.stores_sample_mask) { |
| 1575 | sample_mask = DefineOutput(*this, U32[1], std::nullopt); | 1575 | const Id array_type{TypeArray(U32[1], Const(1U))}; |
| 1576 | sample_mask = DefineOutput(*this, array_type, std::nullopt); | ||
| 1576 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); | 1577 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); |
| 1577 | } | 1578 | } |
| 1578 | break; | 1579 | break; |
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 39b774c98..1e158f375 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -15,7 +15,7 @@ add_executable(tests | |||
| 15 | core/core_timing.cpp | 15 | core/core_timing.cpp |
| 16 | core/internal_network/network.cpp | 16 | core/internal_network/network.cpp |
| 17 | precompiled_headers.h | 17 | precompiled_headers.h |
| 18 | video_core/buffer_base.cpp | 18 | video_core/memory_tracker.cpp |
| 19 | input_common/calibration_configuration_job.cpp | 19 | input_common/calibration_configuration_job.cpp |
| 20 | ) | 20 | ) |
| 21 | 21 | ||
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp deleted file mode 100644 index 734dbf4b6..000000000 --- a/src/tests/video_core/buffer_base.cpp +++ /dev/null | |||
| @@ -1,549 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include <catch2/catch_test_macros.hpp> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | using VideoCommon::BufferBase; | ||
| 15 | using Range = std::pair<u64, u64>; | ||
| 16 | |||
| 17 | constexpr u64 PAGE = 4096; | ||
| 18 | constexpr u64 WORD = 4096 * 64; | ||
| 19 | |||
| 20 | constexpr VAddr c = 0x1328914000; | ||
| 21 | |||
| 22 | class RasterizerInterface { | ||
| 23 | public: | ||
| 24 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 25 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | ||
| 26 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | ||
| 27 | Core::Memory::YUZU_PAGEBITS}; | ||
| 28 | for (u64 page = page_start; page < page_end; ++page) { | ||
| 29 | int& value = page_table[page]; | ||
| 30 | value += delta; | ||
| 31 | if (value < 0) { | ||
| 32 | throw std::logic_error{"negative page"}; | ||
| 33 | } | ||
| 34 | if (value == 0) { | ||
| 35 | page_table.erase(page); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] int Count(VAddr addr) const noexcept { | ||
| 41 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 42 | return it == page_table.end() ? 0 : it->second; | ||
| 43 | } | ||
| 44 | |||
| 45 | [[nodiscard]] unsigned Count() const noexcept { | ||
| 46 | unsigned count = 0; | ||
| 47 | for (const auto& [index, value] : page_table) { | ||
| 48 | count += value; | ||
| 49 | } | ||
| 50 | return count; | ||
| 51 | } | ||
| 52 | |||
| 53 | private: | ||
| 54 | std::unordered_map<u64, int> page_table; | ||
| 55 | }; | ||
| 56 | } // Anonymous namespace | ||
| 57 | |||
| 58 | TEST_CASE("BufferBase: Small buffer", "[video_core]") { | ||
| 59 | RasterizerInterface rasterizer; | ||
| 60 | BufferBase buffer(rasterizer, c, WORD); | ||
| 61 | REQUIRE(rasterizer.Count() == 0); | ||
| 62 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 63 | REQUIRE(rasterizer.Count() == WORD / PAGE); | ||
| 64 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0}); | ||
| 65 | |||
| 66 | buffer.MarkRegionAsCpuModified(c + PAGE, 1); | ||
| 67 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2}); | ||
| 68 | } | ||
| 69 | |||
| 70 | TEST_CASE("BufferBase: Large buffer", "[video_core]") { | ||
| 71 | RasterizerInterface rasterizer; | ||
| 72 | BufferBase buffer(rasterizer, c, WORD * 32); | ||
| 73 | buffer.UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 74 | buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4); | ||
| 75 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2}); | ||
| 76 | REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8}); | ||
| 77 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE}); | ||
| 78 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE}); | ||
| 79 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == | ||
| 80 | Range{WORD * 3 + PAGE * 63, WORD * 4}); | ||
| 81 | |||
| 82 | buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); | ||
| 83 | buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 84 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 85 | Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9}); | ||
| 86 | |||
| 87 | buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 88 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 89 | Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7}); | ||
| 90 | |||
| 91 | buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); | ||
| 92 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32}); | ||
| 93 | |||
| 94 | buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); | ||
| 95 | buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); | ||
| 96 | |||
| 97 | buffer.UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 98 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); | ||
| 99 | } | ||
| 100 | |||
| 101 | TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") { | ||
| 102 | RasterizerInterface rasterizer; | ||
| 103 | BufferBase buffer(rasterizer, c, PAGE * 2); | ||
| 104 | REQUIRE(rasterizer.Count() == 0); | ||
| 105 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 106 | REQUIRE(rasterizer.Count() == 1); | ||
| 107 | buffer.MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 108 | REQUIRE(rasterizer.Count() == 0); | ||
| 109 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 110 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 111 | REQUIRE(rasterizer.Count() == 2); | ||
| 112 | buffer.MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 113 | REQUIRE(rasterizer.Count() == 0); | ||
| 114 | } | ||
| 115 | |||
| 116 | TEST_CASE("BufferBase: Basic range", "[video_core]") { | ||
| 117 | RasterizerInterface rasterizer; | ||
| 118 | BufferBase buffer(rasterizer, c, WORD); | ||
| 119 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 120 | buffer.MarkRegionAsCpuModified(c, PAGE); | ||
| 121 | int num = 0; | ||
| 122 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 123 | REQUIRE(offset == 0U); | ||
| 124 | REQUIRE(size == PAGE); | ||
| 125 | ++num; | ||
| 126 | }); | ||
| 127 | REQUIRE(num == 1U); | ||
| 128 | } | ||
| 129 | |||
| 130 | TEST_CASE("BufferBase: Border upload", "[video_core]") { | ||
| 131 | RasterizerInterface rasterizer; | ||
| 132 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 133 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 134 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 135 | buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { | ||
| 136 | REQUIRE(offset == WORD - PAGE); | ||
| 137 | REQUIRE(size == PAGE * 2); | ||
| 138 | }); | ||
| 139 | } | ||
| 140 | |||
| 141 | TEST_CASE("BufferBase: Border upload range", "[video_core]") { | ||
| 142 | RasterizerInterface rasterizer; | ||
| 143 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 144 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 145 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 146 | buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { | ||
| 147 | REQUIRE(offset == WORD - PAGE); | ||
| 148 | REQUIRE(size == PAGE * 2); | ||
| 149 | }); | ||
| 150 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 151 | buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { | ||
| 152 | REQUIRE(offset == WORD - PAGE); | ||
| 153 | REQUIRE(size == PAGE); | ||
| 154 | }); | ||
| 155 | buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { | ||
| 156 | REQUIRE(offset == WORD); | ||
| 157 | REQUIRE(size == PAGE); | ||
| 158 | }); | ||
| 159 | } | ||
| 160 | |||
| 161 | TEST_CASE("BufferBase: Border upload partial range", "[video_core]") { | ||
| 162 | RasterizerInterface rasterizer; | ||
| 163 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 164 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 165 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 166 | buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { | ||
| 167 | REQUIRE(offset == WORD - PAGE); | ||
| 168 | REQUIRE(size == PAGE * 2); | ||
| 169 | }); | ||
| 170 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 171 | buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { | ||
| 172 | REQUIRE(offset == WORD - PAGE); | ||
| 173 | REQUIRE(size == PAGE); | ||
| 174 | }); | ||
| 175 | buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { | ||
| 176 | REQUIRE(offset == WORD); | ||
| 177 | REQUIRE(size == PAGE); | ||
| 178 | }); | ||
| 179 | } | ||
| 180 | |||
| 181 | TEST_CASE("BufferBase: Partial word uploads", "[video_core]") { | ||
| 182 | RasterizerInterface rasterizer; | ||
| 183 | BufferBase buffer(rasterizer, c, 0x9d000); | ||
| 184 | int num = 0; | ||
| 185 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 186 | REQUIRE(offset == 0U); | ||
| 187 | REQUIRE(size == WORD); | ||
| 188 | ++num; | ||
| 189 | }); | ||
| 190 | REQUIRE(num == 1); | ||
| 191 | buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { | ||
| 192 | REQUIRE(offset == WORD); | ||
| 193 | REQUIRE(size == WORD); | ||
| 194 | ++num; | ||
| 195 | }); | ||
| 196 | REQUIRE(num == 2); | ||
| 197 | buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { | ||
| 198 | REQUIRE(offset == WORD * 2); | ||
| 199 | REQUIRE(size == PAGE * 0x1d); | ||
| 200 | ++num; | ||
| 201 | }); | ||
| 202 | REQUIRE(num == 3); | ||
| 203 | } | ||
| 204 | |||
| 205 | TEST_CASE("BufferBase: Partial page upload", "[video_core]") { | ||
| 206 | RasterizerInterface rasterizer; | ||
| 207 | BufferBase buffer(rasterizer, c, WORD); | ||
| 208 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 209 | int num = 0; | ||
| 210 | buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE); | ||
| 211 | buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE); | ||
| 212 | buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 213 | REQUIRE(offset == PAGE * 2); | ||
| 214 | REQUIRE(size == PAGE); | ||
| 215 | ++num; | ||
| 216 | }); | ||
| 217 | REQUIRE(num == 1); | ||
| 218 | buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 219 | REQUIRE(offset == PAGE * 9); | ||
| 220 | REQUIRE(size == PAGE); | ||
| 221 | ++num; | ||
| 222 | }); | ||
| 223 | REQUIRE(num == 2); | ||
| 224 | } | ||
| 225 | |||
| 226 | TEST_CASE("BufferBase: Partial page upload with multiple words on the right") { | ||
| 227 | RasterizerInterface rasterizer; | ||
| 228 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 229 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 230 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 231 | int num = 0; | ||
| 232 | buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { | ||
| 233 | REQUIRE(offset == PAGE * 13); | ||
| 234 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 235 | ++num; | ||
| 236 | }); | ||
| 237 | REQUIRE(num == 1); | ||
| 238 | buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { | ||
| 239 | REQUIRE(offset == WORD * 7 + PAGE * 10); | ||
| 240 | REQUIRE(size == PAGE * 3); | ||
| 241 | ++num; | ||
| 242 | }); | ||
| 243 | REQUIRE(num == 2); | ||
| 244 | } | ||
| 245 | |||
| 246 | TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") { | ||
| 247 | RasterizerInterface rasterizer; | ||
| 248 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 249 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 250 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 251 | int num = 0; | ||
| 252 | buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { | ||
| 253 | REQUIRE(offset == PAGE * 16); | ||
| 254 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 255 | ++num; | ||
| 256 | }); | ||
| 257 | REQUIRE(num == 1); | ||
| 258 | buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { | ||
| 259 | REQUIRE(offset == PAGE * 13); | ||
| 260 | REQUIRE(size == PAGE * 3); | ||
| 261 | ++num; | ||
| 262 | }); | ||
| 263 | REQUIRE(num == 2); | ||
| 264 | } | ||
| 265 | |||
| 266 | TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") { | ||
| 267 | RasterizerInterface rasterizer; | ||
| 268 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 269 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 270 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); | ||
| 271 | int num = 0; | ||
| 272 | buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { | ||
| 273 | REQUIRE(offset == PAGE * 16); | ||
| 274 | REQUIRE(size == WORD); | ||
| 275 | ++num; | ||
| 276 | }); | ||
| 277 | REQUIRE(num == 1); | ||
| 278 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 279 | REQUIRE(offset == PAGE * 13); | ||
| 280 | REQUIRE(size == PAGE * 3); | ||
| 281 | ++num; | ||
| 282 | }); | ||
| 283 | REQUIRE(num == 2); | ||
| 284 | buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { | ||
| 285 | REQUIRE(offset == WORD + PAGE * 16); | ||
| 286 | REQUIRE(size == PAGE * 73); | ||
| 287 | ++num; | ||
| 288 | }); | ||
| 289 | REQUIRE(num == 3); | ||
| 290 | } | ||
| 291 | |||
| 292 | TEST_CASE("BufferBase: Empty right bits", "[video_core]") { | ||
| 293 | RasterizerInterface rasterizer; | ||
| 294 | BufferBase buffer(rasterizer, c, WORD * 2048); | ||
| 295 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2048); | ||
| 296 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 297 | buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { | ||
| 298 | REQUIRE(offset == WORD - PAGE); | ||
| 299 | REQUIRE(size == PAGE * 2); | ||
| 300 | }); | ||
| 301 | } | ||
| 302 | |||
| 303 | TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") { | ||
| 304 | RasterizerInterface rasterizer; | ||
| 305 | BufferBase buffer(rasterizer, c, WORD); | ||
| 306 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 307 | buffer.MarkRegionAsCpuModified(c, PAGE); | ||
| 308 | int num = 0; | ||
| 309 | buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 310 | buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 311 | buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); | ||
| 312 | REQUIRE(num == 0); | ||
| 313 | buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); | ||
| 314 | REQUIRE(num == 1); | ||
| 315 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 316 | REQUIRE(rasterizer.Count() == 0); | ||
| 317 | } | ||
| 318 | |||
| 319 | TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") { | ||
| 320 | RasterizerInterface rasterizer; | ||
| 321 | BufferBase buffer(rasterizer, c, 0x22000); | ||
| 322 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); | ||
| 323 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); | ||
| 324 | REQUIRE(rasterizer.Count() == 0); | ||
| 325 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); | ||
| 326 | REQUIRE(rasterizer.Count() == 1); | ||
| 327 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2)); | ||
| 328 | buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2); | ||
| 329 | buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2); | ||
| 330 | REQUIRE(rasterizer.Count() == 2); | ||
| 331 | } | ||
| 332 | |||
| 333 | TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") { | ||
| 334 | RasterizerInterface rasterizer; | ||
| 335 | BufferBase buffer(rasterizer, c, 0x310720); | ||
| 336 | buffer.UnmarkRegionAsCpuModified(c, 0x310720); | ||
| 337 | REQUIRE(rasterizer.Count(c) == 1); | ||
| 338 | REQUIRE(rasterizer.Count(c + PAGE) == 1); | ||
| 339 | REQUIRE(rasterizer.Count(c + WORD) == 1); | ||
| 340 | REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); | ||
| 341 | } | ||
| 342 | |||
| 343 | TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") { | ||
| 344 | RasterizerInterface rasterizer; | ||
| 345 | BufferBase buffer(rasterizer, c, WORD); | ||
| 346 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 347 | buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE); | ||
| 348 | buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); | ||
| 349 | buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 350 | static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3}; | ||
| 351 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4}; | ||
| 352 | REQUIRE(offset == offsets.at(i)); | ||
| 353 | REQUIRE(size == sizes.at(i)); | ||
| 354 | ++i; | ||
| 355 | }); | ||
| 356 | } | ||
| 357 | |||
| 358 | TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") { | ||
| 359 | RasterizerInterface rasterizer; | ||
| 360 | BufferBase buffer(rasterizer, c, 0x22000); | ||
| 361 | buffer.UnmarkRegionAsCpuModified(c, 0x22000); | ||
| 362 | REQUIRE(rasterizer.Count() == 0x22); | ||
| 363 | buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); | ||
| 364 | buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); | ||
| 365 | buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 366 | static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21}; | ||
| 367 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE}; | ||
| 368 | REQUIRE(offset == offsets.at(i)); | ||
| 369 | REQUIRE(size == sizes.at(i)); | ||
| 370 | ++i; | ||
| 371 | }); | ||
| 372 | } | ||
| 373 | |||
| 374 | TEST_CASE("BufferBase: Single page modified range", "[video_core]") { | ||
| 375 | RasterizerInterface rasterizer; | ||
| 376 | BufferBase buffer(rasterizer, c, PAGE); | ||
| 377 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 378 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 379 | REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); | ||
| 380 | } | ||
| 381 | |||
| 382 | TEST_CASE("BufferBase: Two page modified range", "[video_core]") { | ||
| 383 | RasterizerInterface rasterizer; | ||
| 384 | BufferBase buffer(rasterizer, c, PAGE * 2); | ||
| 385 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 386 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 387 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2)); | ||
| 388 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 389 | REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); | ||
| 390 | } | ||
| 391 | |||
| 392 | TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") { | ||
| 393 | for (int offset = 0; offset < 4; ++offset) { | ||
| 394 | const VAddr address = c + WORD * offset; | ||
| 395 | RasterizerInterface rasterizer; | ||
| 396 | BufferBase buffer(rasterizer, address, WORD * 4); | ||
| 397 | REQUIRE(buffer.IsRegionCpuModified(address, PAGE)); | ||
| 398 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE)); | ||
| 399 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE)); | ||
| 400 | |||
| 401 | buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); | ||
| 402 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD)); | ||
| 403 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE)); | ||
| 404 | REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE)); | ||
| 405 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE)); | ||
| 406 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); | ||
| 407 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 408 | |||
| 409 | buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); | ||
| 410 | REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") { | ||
| 415 | RasterizerInterface rasterizer; | ||
| 416 | BufferBase buffer(rasterizer, c, WORD * 16); | ||
| 417 | buffer.UnmarkRegionAsCpuModified(c, WORD * 16); | ||
| 418 | REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16)); | ||
| 419 | |||
| 420 | buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); | ||
| 421 | REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16)); | ||
| 422 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2)); | ||
| 423 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2)); | ||
| 424 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2)); | ||
| 425 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); | ||
| 426 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); | ||
| 427 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); | ||
| 428 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); | ||
| 429 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); | ||
| 430 | } | ||
| 431 | |||
| 432 | TEST_CASE("BufferBase: Out of bounds region query") { | ||
| 433 | RasterizerInterface rasterizer; | ||
| 434 | BufferBase buffer(rasterizer, c, WORD * 16); | ||
| 435 | REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE)); | ||
| 436 | REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE)); | ||
| 437 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE)); | ||
| 438 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64)); | ||
| 439 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64)); | ||
| 440 | } | ||
| 441 | |||
| 442 | TEST_CASE("BufferBase: Wrap word regions") { | ||
| 443 | RasterizerInterface rasterizer; | ||
| 444 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 445 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 446 | buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); | ||
| 447 | REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2)); | ||
| 448 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE)); | ||
| 449 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE)); | ||
| 450 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE)); | ||
| 451 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); | ||
| 452 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); | ||
| 453 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); | ||
| 454 | |||
| 455 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 456 | buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE); | ||
| 457 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 458 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE)); | ||
| 459 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE)); | ||
| 460 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); | ||
| 461 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16)); | ||
| 462 | } | ||
| 463 | |||
| 464 | TEST_CASE("BufferBase: Unaligned page region query") { | ||
| 465 | RasterizerInterface rasterizer; | ||
| 466 | BufferBase buffer(rasterizer, c, WORD); | ||
| 467 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 468 | buffer.MarkRegionAsCpuModified(c + 4000, 1000); | ||
| 469 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 470 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 471 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); | ||
| 472 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); | ||
| 473 | } | ||
| 474 | |||
| 475 | TEST_CASE("BufferBase: Cached write") { | ||
| 476 | RasterizerInterface rasterizer; | ||
| 477 | BufferBase buffer(rasterizer, c, WORD); | ||
| 478 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 479 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 480 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | buffer.FlushCachedWrites(); | ||
| 482 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 483 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 484 | REQUIRE(rasterizer.Count() == 0); | ||
| 485 | } | ||
| 486 | |||
| 487 | TEST_CASE("BufferBase: Multiple cached write") { | ||
| 488 | RasterizerInterface rasterizer; | ||
| 489 | BufferBase buffer(rasterizer, c, WORD); | ||
| 490 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 491 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 492 | buffer.CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 493 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 494 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 495 | buffer.FlushCachedWrites(); | ||
| 496 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 497 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 498 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 499 | REQUIRE(rasterizer.Count() == 0); | ||
| 500 | } | ||
| 501 | |||
| 502 | TEST_CASE("BufferBase: Cached write unmarked") { | ||
| 503 | RasterizerInterface rasterizer; | ||
| 504 | BufferBase buffer(rasterizer, c, WORD); | ||
| 505 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 506 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 507 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 508 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | buffer.FlushCachedWrites(); | ||
| 510 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 511 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 512 | REQUIRE(rasterizer.Count() == 0); | ||
| 513 | } | ||
| 514 | |||
| 515 | TEST_CASE("BufferBase: Cached write iterated") { | ||
| 516 | RasterizerInterface rasterizer; | ||
| 517 | BufferBase buffer(rasterizer, c, WORD); | ||
| 518 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 519 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 520 | int num = 0; | ||
| 521 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 522 | REQUIRE(num == 0); | ||
| 523 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | buffer.FlushCachedWrites(); | ||
| 525 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 526 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 527 | REQUIRE(rasterizer.Count() == 0); | ||
| 528 | } | ||
| 529 | |||
| 530 | TEST_CASE("BufferBase: Cached write downloads") { | ||
| 531 | RasterizerInterface rasterizer; | ||
| 532 | BufferBase buffer(rasterizer, c, WORD); | ||
| 533 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 534 | REQUIRE(rasterizer.Count() == 64); | ||
| 535 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 536 | REQUIRE(rasterizer.Count() == 63); | ||
| 537 | buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 538 | int num = 0; | ||
| 539 | buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | buffer.FlushCachedWrites(); | ||
| 545 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } | ||
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp new file mode 100644 index 000000000..3981907a2 --- /dev/null +++ b/src/tests/video_core/memory_tracker.cpp | |||
| @@ -0,0 +1,549 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <memory> | ||
| 5 | #include <stdexcept> | ||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include <catch2/catch_test_macros.hpp> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | using Range = std::pair<u64, u64>; | ||
| 16 | |||
| 17 | constexpr u64 PAGE = 4096; | ||
| 18 | constexpr u64 WORD = 4096 * 64; | ||
| 19 | constexpr u64 HIGH_PAGE_BITS = 22; | ||
| 20 | constexpr u64 HIGH_PAGE_SIZE = 1ULL << HIGH_PAGE_BITS; | ||
| 21 | |||
| 22 | constexpr VAddr c = 16 * HIGH_PAGE_SIZE; | ||
| 23 | |||
| 24 | class RasterizerInterface { | ||
| 25 | public: | ||
| 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 27 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | ||
| 28 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | ||
| 29 | Core::Memory::YUZU_PAGEBITS}; | ||
| 30 | for (u64 page = page_start; page < page_end; ++page) { | ||
| 31 | int& value = page_table[page]; | ||
| 32 | value += delta; | ||
| 33 | if (value < 0) { | ||
| 34 | throw std::logic_error{"negative page"}; | ||
| 35 | } | ||
| 36 | if (value == 0) { | ||
| 37 | page_table.erase(page); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] int Count(VAddr addr) const noexcept { | ||
| 43 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 44 | return it == page_table.end() ? 0 : it->second; | ||
| 45 | } | ||
| 46 | |||
| 47 | [[nodiscard]] unsigned Count() const noexcept { | ||
| 48 | unsigned count = 0; | ||
| 49 | for (const auto& [index, value] : page_table) { | ||
| 50 | count += value; | ||
| 51 | } | ||
| 52 | return count; | ||
| 53 | } | ||
| 54 | |||
| 55 | private: | ||
| 56 | std::unordered_map<u64, int> page_table; | ||
| 57 | }; | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>; | ||
| 61 | |||
| 62 | TEST_CASE("MemoryTracker: Small region", "[video_core]") { | ||
| 63 | RasterizerInterface rasterizer; | ||
| 64 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 65 | REQUIRE(rasterizer.Count() == 0); | ||
| 66 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 67 | REQUIRE(rasterizer.Count() == WORD / PAGE); | ||
| 68 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{0, 0}); | ||
| 69 | |||
| 70 | memory_track->MarkRegionAsCpuModified(c + PAGE, 1); | ||
| 71 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{c + PAGE * 1, c + PAGE * 2}); | ||
| 72 | } | ||
| 73 | |||
| 74 | TEST_CASE("MemoryTracker: Large region", "[video_core]") { | ||
| 75 | RasterizerInterface rasterizer; | ||
| 76 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 77 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 78 | memory_track->MarkRegionAsCpuModified(c + 4096, WORD * 4); | ||
| 79 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD + PAGE * 2) == | ||
| 80 | Range{c + PAGE, c + WORD + PAGE * 2}); | ||
| 81 | REQUIRE(memory_track->ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == | ||
| 82 | Range{c + PAGE * 2, c + PAGE * 8}); | ||
| 83 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 4 + PAGE}); | ||
| 84 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 4, PAGE) == | ||
| 85 | Range{c + WORD * 4, c + WORD * 4 + PAGE}); | ||
| 86 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == | ||
| 87 | Range{c + WORD * 3 + PAGE * 63, c + WORD * 4}); | ||
| 88 | |||
| 89 | memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); | ||
| 90 | memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 91 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 92 | Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 9}); | ||
| 93 | |||
| 94 | memory_track->UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 95 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 96 | Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 7}); | ||
| 97 | |||
| 98 | memory_track->MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); | ||
| 99 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 32}); | ||
| 100 | |||
| 101 | memory_track->UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); | ||
| 102 | memory_track->UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); | ||
| 103 | |||
| 104 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 105 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); | ||
| 106 | } | ||
| 107 | |||
| 108 | TEST_CASE("MemoryTracker: Rasterizer counting", "[video_core]") { | ||
| 109 | RasterizerInterface rasterizer; | ||
| 110 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 111 | REQUIRE(rasterizer.Count() == 0); | ||
| 112 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 113 | REQUIRE(rasterizer.Count() == 1); | ||
| 114 | memory_track->MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 115 | REQUIRE(rasterizer.Count() == 0); | ||
| 116 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 117 | memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 118 | REQUIRE(rasterizer.Count() == 2); | ||
| 119 | memory_track->MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 120 | REQUIRE(rasterizer.Count() == 0); | ||
| 121 | } | ||
| 122 | |||
| 123 | TEST_CASE("MemoryTracker: Basic range", "[video_core]") { | ||
| 124 | RasterizerInterface rasterizer; | ||
| 125 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 126 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 127 | memory_track->MarkRegionAsCpuModified(c, PAGE); | ||
| 128 | int num = 0; | ||
| 129 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 130 | REQUIRE(offset == c); | ||
| 131 | REQUIRE(size == PAGE); | ||
| 132 | ++num; | ||
| 133 | }); | ||
| 134 | REQUIRE(num == 1U); | ||
| 135 | } | ||
| 136 | |||
| 137 | TEST_CASE("MemoryTracker: Border upload", "[video_core]") { | ||
| 138 | RasterizerInterface rasterizer; | ||
| 139 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 140 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 141 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 142 | memory_track->ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { | ||
| 143 | REQUIRE(offset == c + WORD - PAGE); | ||
| 144 | REQUIRE(size == PAGE * 2); | ||
| 145 | }); | ||
| 146 | } | ||
| 147 | |||
| 148 | TEST_CASE("MemoryTracker: Border upload range", "[video_core]") { | ||
| 149 | RasterizerInterface rasterizer; | ||
| 150 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 151 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 152 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 153 | memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { | ||
| 154 | REQUIRE(offset == c + WORD - PAGE); | ||
| 155 | REQUIRE(size == PAGE * 2); | ||
| 156 | }); | ||
| 157 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 158 | memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { | ||
| 159 | REQUIRE(offset == c + WORD - PAGE); | ||
| 160 | REQUIRE(size == PAGE); | ||
| 161 | }); | ||
| 162 | memory_track->ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { | ||
| 163 | REQUIRE(offset == c + WORD); | ||
| 164 | REQUIRE(size == PAGE); | ||
| 165 | }); | ||
| 166 | } | ||
| 167 | |||
| 168 | TEST_CASE("MemoryTracker: Border upload partial range", "[video_core]") { | ||
| 169 | RasterizerInterface rasterizer; | ||
| 170 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 171 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 172 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 173 | memory_track->ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { | ||
| 174 | REQUIRE(offset == c + WORD - PAGE); | ||
| 175 | REQUIRE(size == PAGE * 2); | ||
| 176 | }); | ||
| 177 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 178 | memory_track->ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { | ||
| 179 | REQUIRE(offset == c + WORD - PAGE); | ||
| 180 | REQUIRE(size == PAGE); | ||
| 181 | }); | ||
| 182 | memory_track->ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { | ||
| 183 | REQUIRE(offset == c + WORD); | ||
| 184 | REQUIRE(size == PAGE); | ||
| 185 | }); | ||
| 186 | } | ||
| 187 | |||
| 188 | TEST_CASE("MemoryTracker: Partial word uploads", "[video_core]") { | ||
| 189 | RasterizerInterface rasterizer; | ||
| 190 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 191 | int num = 0; | ||
| 192 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 193 | REQUIRE(offset == c); | ||
| 194 | REQUIRE(size == WORD); | ||
| 195 | ++num; | ||
| 196 | }); | ||
| 197 | REQUIRE(num == 1); | ||
| 198 | memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { | ||
| 199 | REQUIRE(offset == c + WORD); | ||
| 200 | REQUIRE(size == WORD); | ||
| 201 | ++num; | ||
| 202 | }); | ||
| 203 | REQUIRE(num == 2); | ||
| 204 | memory_track->ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { | ||
| 205 | REQUIRE(offset == c + WORD * 2); | ||
| 206 | REQUIRE(size == PAGE * 0x1d); | ||
| 207 | ++num; | ||
| 208 | }); | ||
| 209 | REQUIRE(num == 3); | ||
| 210 | } | ||
| 211 | |||
| 212 | TEST_CASE("MemoryTracker: Partial page upload", "[video_core]") { | ||
| 213 | RasterizerInterface rasterizer; | ||
| 214 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 215 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 216 | int num = 0; | ||
| 217 | memory_track->MarkRegionAsCpuModified(c + PAGE * 2, PAGE); | ||
| 218 | memory_track->MarkRegionAsCpuModified(c + PAGE * 9, PAGE); | ||
| 219 | memory_track->ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 220 | REQUIRE(offset == c + PAGE * 2); | ||
| 221 | REQUIRE(size == PAGE); | ||
| 222 | ++num; | ||
| 223 | }); | ||
| 224 | REQUIRE(num == 1); | ||
| 225 | memory_track->ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 226 | REQUIRE(offset == c + PAGE * 9); | ||
| 227 | REQUIRE(size == PAGE); | ||
| 228 | ++num; | ||
| 229 | }); | ||
| 230 | REQUIRE(num == 2); | ||
| 231 | } | ||
| 232 | |||
| 233 | TEST_CASE("MemoryTracker: Partial page upload with multiple words on the right") { | ||
| 234 | RasterizerInterface rasterizer; | ||
| 235 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 236 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 9); | ||
| 237 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 238 | int num = 0; | ||
| 239 | memory_track->ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { | ||
| 240 | REQUIRE(offset == c + PAGE * 13); | ||
| 241 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 242 | ++num; | ||
| 243 | }); | ||
| 244 | REQUIRE(num == 1); | ||
| 245 | memory_track->ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { | ||
| 246 | REQUIRE(offset == c + WORD * 7 + PAGE * 10); | ||
| 247 | REQUIRE(size == PAGE * 3); | ||
| 248 | ++num; | ||
| 249 | }); | ||
| 250 | REQUIRE(num == 2); | ||
| 251 | } | ||
| 252 | |||
| 253 | TEST_CASE("MemoryTracker: Partial page upload with multiple words on the left", "[video_core]") { | ||
| 254 | RasterizerInterface rasterizer; | ||
| 255 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 256 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 257 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 258 | int num = 0; | ||
| 259 | memory_track->ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { | ||
| 260 | REQUIRE(offset == c + PAGE * 16); | ||
| 261 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 262 | ++num; | ||
| 263 | }); | ||
| 264 | REQUIRE(num == 1); | ||
| 265 | memory_track->ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { | ||
| 266 | REQUIRE(offset == c + PAGE * 13); | ||
| 267 | REQUIRE(size == PAGE * 3); | ||
| 268 | ++num; | ||
| 269 | }); | ||
| 270 | REQUIRE(num == 2); | ||
| 271 | } | ||
| 272 | |||
| 273 | TEST_CASE("MemoryTracker: Partial page upload with multiple words in the middle", "[video_core]") { | ||
| 274 | RasterizerInterface rasterizer; | ||
| 275 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 276 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 277 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); | ||
| 278 | int num = 0; | ||
| 279 | memory_track->ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { | ||
| 280 | REQUIRE(offset == c + PAGE * 16); | ||
| 281 | REQUIRE(size == WORD); | ||
| 282 | ++num; | ||
| 283 | }); | ||
| 284 | REQUIRE(num == 1); | ||
| 285 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 286 | REQUIRE(offset == c + PAGE * 13); | ||
| 287 | REQUIRE(size == PAGE * 3); | ||
| 288 | ++num; | ||
| 289 | }); | ||
| 290 | REQUIRE(num == 2); | ||
| 291 | memory_track->ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { | ||
| 292 | REQUIRE(offset == c + WORD + PAGE * 16); | ||
| 293 | REQUIRE(size == PAGE * 73); | ||
| 294 | ++num; | ||
| 295 | }); | ||
| 296 | REQUIRE(num == 3); | ||
| 297 | } | ||
| 298 | |||
| 299 | TEST_CASE("MemoryTracker: Empty right bits", "[video_core]") { | ||
| 300 | RasterizerInterface rasterizer; | ||
| 301 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 302 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2048); | ||
| 303 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 304 | memory_track->ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { | ||
| 305 | REQUIRE(offset == c + WORD - PAGE); | ||
| 306 | REQUIRE(size == PAGE * 2); | ||
| 307 | }); | ||
| 308 | } | ||
| 309 | |||
| 310 | TEST_CASE("MemoryTracker: Out of bound ranges 1", "[video_core]") { | ||
| 311 | RasterizerInterface rasterizer; | ||
| 312 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 313 | memory_track->UnmarkRegionAsCpuModified(c - WORD, 3 * WORD); | ||
| 314 | memory_track->MarkRegionAsCpuModified(c, PAGE); | ||
| 315 | REQUIRE(rasterizer.Count() == (3 * WORD - PAGE) / PAGE); | ||
| 316 | int num = 0; | ||
| 317 | memory_track->ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 318 | memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 319 | memory_track->ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); | ||
| 320 | REQUIRE(num == 0); | ||
| 321 | memory_track->ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); | ||
| 322 | REQUIRE(num == 1); | ||
| 323 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 324 | REQUIRE(rasterizer.Count() == 2 * WORD / PAGE); | ||
| 325 | } | ||
| 326 | |||
| 327 | TEST_CASE("MemoryTracker: Out of bound ranges 2", "[video_core]") { | ||
| 328 | RasterizerInterface rasterizer; | ||
| 329 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 330 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); | ||
| 331 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); | ||
| 332 | REQUIRE(rasterizer.Count() == 2); | ||
| 333 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); | ||
| 334 | REQUIRE(rasterizer.Count() == 3); | ||
| 335 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c - PAGE, PAGE * 2)); | ||
| 336 | memory_track->UnmarkRegionAsCpuModified(c - PAGE * 3, PAGE * 2); | ||
| 337 | memory_track->UnmarkRegionAsCpuModified(c - PAGE * 2, PAGE * 2); | ||
| 338 | REQUIRE(rasterizer.Count() == 7); | ||
| 339 | } | ||
| 340 | |||
| 341 | TEST_CASE("MemoryTracker: Out of bound ranges 3", "[video_core]") { | ||
| 342 | RasterizerInterface rasterizer; | ||
| 343 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 344 | memory_track->UnmarkRegionAsCpuModified(c, 0x310720); | ||
| 345 | REQUIRE(rasterizer.Count(c) == 1); | ||
| 346 | REQUIRE(rasterizer.Count(c + PAGE) == 1); | ||
| 347 | REQUIRE(rasterizer.Count(c + WORD) == 1); | ||
| 348 | REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); | ||
| 349 | } | ||
| 350 | |||
| 351 | TEST_CASE("MemoryTracker: Sparse regions 1", "[video_core]") { | ||
| 352 | RasterizerInterface rasterizer; | ||
| 353 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 354 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 355 | memory_track->MarkRegionAsCpuModified(c + PAGE * 1, PAGE); | ||
| 356 | memory_track->MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); | ||
| 357 | memory_track->ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 358 | static constexpr std::array<u64, 2> offsets{c + PAGE, c + PAGE * 3}; | ||
| 359 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4}; | ||
| 360 | REQUIRE(offset == offsets.at(i)); | ||
| 361 | REQUIRE(size == sizes.at(i)); | ||
| 362 | ++i; | ||
| 363 | }); | ||
| 364 | } | ||
| 365 | |||
| 366 | TEST_CASE("MemoryTracker: Sparse regions 2", "[video_core]") { | ||
| 367 | RasterizerInterface rasterizer; | ||
| 368 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 369 | memory_track->UnmarkRegionAsCpuModified(c, PAGE * 0x23); | ||
| 370 | REQUIRE(rasterizer.Count() == 0x23); | ||
| 371 | memory_track->MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); | ||
| 372 | memory_track->MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); | ||
| 373 | memory_track->ForEachUploadRange(c, PAGE * 0x23, [i = 0](u64 offset, u64 size) mutable { | ||
| 374 | static constexpr std::array<u64, 3> offsets{c + PAGE * 0x1B, c + PAGE * 0x21}; | ||
| 375 | static constexpr std::array<u64, 3> sizes{PAGE, PAGE}; | ||
| 376 | REQUIRE(offset == offsets.at(i)); | ||
| 377 | REQUIRE(size == sizes.at(i)); | ||
| 378 | ++i; | ||
| 379 | }); | ||
| 380 | } | ||
| 381 | |||
| 382 | TEST_CASE("MemoryTracker: Single page modified range", "[video_core]") { | ||
| 383 | RasterizerInterface rasterizer; | ||
| 384 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 385 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 386 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 387 | REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 388 | } | ||
| 389 | |||
| 390 | TEST_CASE("MemoryTracker: Two page modified range", "[video_core]") { | ||
| 391 | RasterizerInterface rasterizer; | ||
| 392 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 393 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 394 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 395 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE * 2)); | ||
| 396 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 397 | REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 398 | } | ||
| 399 | |||
| 400 | TEST_CASE("MemoryTracker: Multi word modified ranges", "[video_core]") { | ||
| 401 | for (int offset = 0; offset < 4; ++offset) { | ||
| 402 | const VAddr address = c + WORD * offset; | ||
| 403 | RasterizerInterface rasterizer; | ||
| 404 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 405 | REQUIRE(memory_track->IsRegionCpuModified(address, PAGE)); | ||
| 406 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 48, PAGE)); | ||
| 407 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 56, PAGE)); | ||
| 408 | |||
| 409 | memory_track->UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); | ||
| 410 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE, WORD)); | ||
| 411 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE)); | ||
| 412 | REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE)); | ||
| 413 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 33, PAGE)); | ||
| 414 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); | ||
| 415 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 416 | |||
| 417 | memory_track->UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); | ||
| 418 | REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | TEST_CASE("MemoryTracker: Single page in large region", "[video_core]") { | ||
| 423 | RasterizerInterface rasterizer; | ||
| 424 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 425 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 16); | ||
| 426 | REQUIRE(!memory_track->IsRegionCpuModified(c, WORD * 16)); | ||
| 427 | |||
| 428 | memory_track->MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); | ||
| 429 | REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 16)); | ||
| 430 | REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 10, WORD * 2)); | ||
| 431 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 11, WORD * 2)); | ||
| 432 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12, WORD * 2)); | ||
| 433 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); | ||
| 434 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); | ||
| 435 | REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); | ||
| 436 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); | ||
| 437 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); | ||
| 438 | } | ||
| 439 | |||
| 440 | TEST_CASE("MemoryTracker: Wrap word regions") { | ||
| 441 | RasterizerInterface rasterizer; | ||
| 442 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 443 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 444 | memory_track->MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); | ||
| 445 | REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 2)); | ||
| 446 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 62, PAGE)); | ||
| 447 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE)); | ||
| 448 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 64, PAGE)); | ||
| 449 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); | ||
| 450 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); | ||
| 451 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); | ||
| 452 | |||
| 453 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 454 | memory_track->MarkRegionAsCpuModified(c + PAGE * 127, PAGE); | ||
| 455 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 456 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, PAGE)); | ||
| 457 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE)); | ||
| 458 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); | ||
| 459 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 128, WORD * 16)); | ||
| 460 | } | ||
| 461 | |||
| 462 | TEST_CASE("MemoryTracker: Unaligned page region query") { | ||
| 463 | RasterizerInterface rasterizer; | ||
| 464 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 465 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 466 | memory_track->MarkRegionAsCpuModified(c + 4000, 1000); | ||
| 467 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 468 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 469 | REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1000)); | ||
| 470 | REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1)); | ||
| 471 | } | ||
| 472 | |||
| 473 | TEST_CASE("MemoryTracker: Cached write") { | ||
| 474 | RasterizerInterface rasterizer; | ||
| 475 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 476 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 477 | memory_track->CachedCpuWrite(c + PAGE, c + PAGE); | ||
| 478 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 479 | memory_track->FlushCachedWrites(); | ||
| 480 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 482 | REQUIRE(rasterizer.Count() == 0); | ||
| 483 | } | ||
| 484 | |||
| 485 | TEST_CASE("MemoryTracker: Multiple cached write") { | ||
| 486 | RasterizerInterface rasterizer; | ||
| 487 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 488 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 489 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 490 | memory_track->CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 491 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 492 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 493 | memory_track->FlushCachedWrites(); | ||
| 494 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 495 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 496 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 497 | REQUIRE(rasterizer.Count() == 0); | ||
| 498 | } | ||
| 499 | |||
| 500 | TEST_CASE("MemoryTracker: Cached write unmarked") { | ||
| 501 | RasterizerInterface rasterizer; | ||
| 502 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 503 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 504 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 505 | memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 506 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 507 | memory_track->FlushCachedWrites(); | ||
| 508 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 510 | REQUIRE(rasterizer.Count() == 0); | ||
| 511 | } | ||
| 512 | |||
| 513 | TEST_CASE("MemoryTracker: Cached write iterated") { | ||
| 514 | RasterizerInterface rasterizer; | ||
| 515 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 516 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 517 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 518 | int num = 0; | ||
| 519 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 520 | REQUIRE(num == 0); | ||
| 521 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 522 | memory_track->FlushCachedWrites(); | ||
| 523 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 525 | REQUIRE(rasterizer.Count() == 0); | ||
| 526 | } | ||
| 527 | |||
| 528 | TEST_CASE("MemoryTracker: Cached write downloads") { | ||
| 529 | RasterizerInterface rasterizer; | ||
| 530 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 531 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 532 | REQUIRE(rasterizer.Count() == 64); | ||
| 533 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 534 | REQUIRE(rasterizer.Count() == 63); | ||
| 535 | memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 536 | int num = 0; | ||
| 537 | memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 538 | REQUIRE(num == 1); | ||
| 539 | num = 0; | ||
| 540 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | memory_track->FlushCachedWrites(); | ||
| 545 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } \ No newline at end of file | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e904573d7..92cab93f3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -11,8 +11,11 @@ endif() | |||
| 11 | 11 | ||
| 12 | add_library(video_core STATIC | 12 | add_library(video_core STATIC |
| 13 | buffer_cache/buffer_base.h | 13 | buffer_cache/buffer_base.h |
| 14 | buffer_cache/buffer_cache_base.h | ||
| 14 | buffer_cache/buffer_cache.cpp | 15 | buffer_cache/buffer_cache.cpp |
| 15 | buffer_cache/buffer_cache.h | 16 | buffer_cache/buffer_cache.h |
| 17 | buffer_cache/memory_tracker_base.h | ||
| 18 | buffer_cache/word_manager.h | ||
| 16 | cache_types.h | 19 | cache_types.h |
| 17 | cdma_pusher.cpp | 20 | cdma_pusher.cpp |
| 18 | cdma_pusher.h | 21 | cdma_pusher.h |
| @@ -104,6 +107,7 @@ add_library(video_core STATIC | |||
| 104 | renderer_null/renderer_null.h | 107 | renderer_null/renderer_null.h |
| 105 | renderer_opengl/blit_image.cpp | 108 | renderer_opengl/blit_image.cpp |
| 106 | renderer_opengl/blit_image.h | 109 | renderer_opengl/blit_image.h |
| 110 | renderer_opengl/gl_buffer_cache_base.cpp | ||
| 107 | renderer_opengl/gl_buffer_cache.cpp | 111 | renderer_opengl/gl_buffer_cache.cpp |
| 108 | renderer_opengl/gl_buffer_cache.h | 112 | renderer_opengl/gl_buffer_cache.h |
| 109 | renderer_opengl/gl_compute_pipeline.cpp | 113 | renderer_opengl/gl_compute_pipeline.cpp |
| @@ -154,6 +158,7 @@ add_library(video_core STATIC | |||
| 154 | renderer_vulkan/renderer_vulkan.cpp | 158 | renderer_vulkan/renderer_vulkan.cpp |
| 155 | renderer_vulkan/vk_blit_screen.cpp | 159 | renderer_vulkan/vk_blit_screen.cpp |
| 156 | renderer_vulkan/vk_blit_screen.h | 160 | renderer_vulkan/vk_blit_screen.h |
| 161 | renderer_vulkan/vk_buffer_cache_base.cpp | ||
| 157 | renderer_vulkan/vk_buffer_cache.cpp | 162 | renderer_vulkan/vk_buffer_cache.cpp |
| 158 | renderer_vulkan/vk_buffer_cache.h | 163 | renderer_vulkan/vk_buffer_cache.h |
| 159 | renderer_vulkan/vk_command_pool.cpp | 164 | renderer_vulkan/vk_command_pool.cpp |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 1b4d63616..9cbd95c4b 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| @@ -11,9 +11,7 @@ | |||
| 11 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 12 | #include "common/common_funcs.h" | 12 | #include "common/common_funcs.h" |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/div_ceil.h" | 14 | #include "video_core/buffer_cache/word_manager.h" |
| 15 | #include "common/settings.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | 15 | ||
| 18 | namespace VideoCommon { | 16 | namespace VideoCommon { |
| 19 | 17 | ||
| @@ -36,116 +34,12 @@ struct NullBufferParams {}; | |||
| 36 | */ | 34 | */ |
| 37 | template <class RasterizerInterface> | 35 | template <class RasterizerInterface> |
| 38 | class BufferBase { | 36 | class BufferBase { |
| 39 | static constexpr u64 PAGES_PER_WORD = 64; | ||
| 40 | static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | ||
| 41 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||
| 42 | |||
| 43 | /// Vector tracking modified pages tightly packed with small vector optimization | ||
| 44 | union WordsArray { | ||
| 45 | /// Returns the pointer to the words state | ||
| 46 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||
| 47 | return is_short ? &stack : heap; | ||
| 48 | } | ||
| 49 | |||
| 50 | /// Returns the pointer to the words state | ||
| 51 | [[nodiscard]] u64* Pointer(bool is_short) noexcept { | ||
| 52 | return is_short ? &stack : heap; | ||
| 53 | } | ||
| 54 | |||
| 55 | u64 stack = 0; ///< Small buffers storage | ||
| 56 | u64* heap; ///< Not-small buffers pointer to the storage | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct Words { | ||
| 60 | explicit Words() = default; | ||
| 61 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||
| 62 | if (IsShort()) { | ||
| 63 | cpu.stack = ~u64{0}; | ||
| 64 | gpu.stack = 0; | ||
| 65 | cached_cpu.stack = 0; | ||
| 66 | untracked.stack = ~u64{0}; | ||
| 67 | } else { | ||
| 68 | // Share allocation between CPU and GPU pages and set their default values | ||
| 69 | const size_t num_words = NumWords(); | ||
| 70 | u64* const alloc = new u64[num_words * 4]; | ||
| 71 | cpu.heap = alloc; | ||
| 72 | gpu.heap = alloc + num_words; | ||
| 73 | cached_cpu.heap = alloc + num_words * 2; | ||
| 74 | untracked.heap = alloc + num_words * 3; | ||
| 75 | std::fill_n(cpu.heap, num_words, ~u64{0}); | ||
| 76 | std::fill_n(gpu.heap, num_words, 0); | ||
| 77 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 78 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 79 | } | ||
| 80 | // Clean up tailing bits | ||
| 81 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||
| 82 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||
| 83 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||
| 84 | const u64 last_word = (~u64{0} << shift) >> shift; | ||
| 85 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 86 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 87 | } | ||
| 88 | |||
| 89 | ~Words() { | ||
| 90 | Release(); | ||
| 91 | } | ||
| 92 | |||
| 93 | Words& operator=(Words&& rhs) noexcept { | ||
| 94 | Release(); | ||
| 95 | size_bytes = rhs.size_bytes; | ||
| 96 | cpu = rhs.cpu; | ||
| 97 | gpu = rhs.gpu; | ||
| 98 | cached_cpu = rhs.cached_cpu; | ||
| 99 | untracked = rhs.untracked; | ||
| 100 | rhs.cpu.heap = nullptr; | ||
| 101 | return *this; | ||
| 102 | } | ||
| 103 | |||
| 104 | Words(Words&& rhs) noexcept | ||
| 105 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||
| 106 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 107 | rhs.cpu.heap = nullptr; | ||
| 108 | } | ||
| 109 | |||
| 110 | Words& operator=(const Words&) = delete; | ||
| 111 | Words(const Words&) = delete; | ||
| 112 | |||
| 113 | /// Returns true when the buffer fits in the small vector optimization | ||
| 114 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 115 | return size_bytes <= BYTES_PER_WORD; | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Returns the number of words of the buffer | ||
| 119 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 120 | return Common::DivCeil(size_bytes, BYTES_PER_WORD); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Release buffer resources | ||
| 124 | void Release() { | ||
| 125 | if (!IsShort()) { | ||
| 126 | // CPU written words is the base for the heap allocation | ||
| 127 | delete[] cpu.heap; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | u64 size_bytes = 0; | ||
| 132 | WordsArray cpu; | ||
| 133 | WordsArray gpu; | ||
| 134 | WordsArray cached_cpu; | ||
| 135 | WordsArray untracked; | ||
| 136 | }; | ||
| 137 | |||
| 138 | enum class Type { | ||
| 139 | CPU, | ||
| 140 | GPU, | ||
| 141 | CachedCPU, | ||
| 142 | Untracked, | ||
| 143 | }; | ||
| 144 | |||
| 145 | public: | 37 | public: |
| 146 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) | 38 | static constexpr u64 BASE_PAGE_BITS = 16; |
| 147 | : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, | 39 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |
| 148 | words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} | 40 | |
| 41 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | ||
| 42 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | ||
| 149 | 43 | ||
| 150 | explicit BufferBase(NullBufferParams) {} | 44 | explicit BufferBase(NullBufferParams) {} |
| 151 | 45 | ||
| @@ -155,100 +49,6 @@ public: | |||
| 155 | BufferBase& operator=(BufferBase&&) = default; | 49 | BufferBase& operator=(BufferBase&&) = default; |
| 156 | BufferBase(BufferBase&&) = default; | 50 | BufferBase(BufferBase&&) = default; |
| 157 | 51 | ||
| 158 | /// Returns the inclusive CPU modified range in a begin end pair | ||
| 159 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||
| 160 | u64 query_size) const noexcept { | ||
| 161 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 162 | return ModifiedRegion<Type::CPU>(offset, query_size); | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Returns the inclusive GPU modified range in a begin end pair | ||
| 166 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||
| 167 | u64 query_size) const noexcept { | ||
| 168 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 169 | return ModifiedRegion<Type::GPU>(offset, query_size); | ||
| 170 | } | ||
| 171 | |||
| 172 | /// Returns true if a region has been modified from the CPU | ||
| 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||
| 174 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 175 | return IsRegionModified<Type::CPU>(offset, query_size); | ||
| 176 | } | ||
| 177 | |||
| 178 | /// Returns true if a region has been modified from the GPU | ||
| 179 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||
| 180 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 181 | return IsRegionModified<Type::GPU>(offset, query_size); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// Mark region as CPU modified, notifying the rasterizer about this change | ||
| 185 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||
| 186 | ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Unmark region as CPU modified, notifying the rasterizer about this change | ||
| 190 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||
| 191 | ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); | ||
| 192 | } | ||
| 193 | |||
| 194 | /// Mark region as modified from the host GPU | ||
| 195 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||
| 196 | ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); | ||
| 197 | } | ||
| 198 | |||
| 199 | /// Unmark region as modified from the host GPU | ||
| 200 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||
| 201 | ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); | ||
| 202 | } | ||
| 203 | |||
| 204 | /// Mark region as modified from the CPU | ||
| 205 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 206 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { | ||
| 207 | flags |= BufferFlagBits::CachedWrites; | ||
| 208 | ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 212 | void FlushCachedWrites() noexcept { | ||
| 213 | flags &= ~BufferFlagBits::CachedWrites; | ||
| 214 | const u64 num_words = NumWords(); | ||
| 215 | u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 216 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 217 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 218 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 219 | const u64 cached_bits = cached_words[word_index]; | ||
| 220 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 221 | untracked_words[word_index] |= cached_bits; | ||
| 222 | cpu_words[word_index] |= cached_bits; | ||
| 223 | if (!Settings::values.use_pessimistic_flushes) { | ||
| 224 | cached_words[word_index] = 0; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | ||
| 230 | template <typename Func> | ||
| 231 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | ||
| 232 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func); | ||
| 233 | } | ||
| 234 | |||
| 235 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 236 | template <typename Func> | ||
| 237 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) { | ||
| 238 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func); | ||
| 239 | } | ||
| 240 | |||
| 241 | template <typename Func> | ||
| 242 | void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) { | ||
| 243 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func); | ||
| 244 | } | ||
| 245 | |||
| 246 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 247 | template <typename Func> | ||
| 248 | void ForEachDownloadRange(Func&& func) { | ||
| 249 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func); | ||
| 250 | } | ||
| 251 | |||
| 252 | /// Mark buffer as picked | 52 | /// Mark buffer as picked |
| 253 | void Pick() noexcept { | 53 | void Pick() noexcept { |
| 254 | flags |= BufferFlagBits::Picked; | 54 | flags |= BufferFlagBits::Picked; |
| @@ -295,11 +95,6 @@ public: | |||
| 295 | return static_cast<u32>(other_cpu_addr - cpu_addr); | 95 | return static_cast<u32>(other_cpu_addr - cpu_addr); |
| 296 | } | 96 | } |
| 297 | 97 | ||
| 298 | /// Returns the size in bytes of the buffer | ||
| 299 | [[nodiscard]] u64 SizeBytes() const noexcept { | ||
| 300 | return words.size_bytes; | ||
| 301 | } | ||
| 302 | |||
| 303 | size_t getLRUID() const noexcept { | 98 | size_t getLRUID() const noexcept { |
| 304 | return lru_id; | 99 | return lru_id; |
| 305 | } | 100 | } |
| @@ -308,305 +103,16 @@ public: | |||
| 308 | lru_id = lru_id_; | 103 | lru_id = lru_id_; |
| 309 | } | 104 | } |
| 310 | 105 | ||
| 311 | private: | 106 | size_t SizeBytes() const { |
| 312 | template <Type type> | 107 | return size_bytes; |
| 313 | u64* Array() noexcept { | ||
| 314 | if constexpr (type == Type::CPU) { | ||
| 315 | return words.cpu.Pointer(IsShort()); | ||
| 316 | } else if constexpr (type == Type::GPU) { | ||
| 317 | return words.gpu.Pointer(IsShort()); | ||
| 318 | } else if constexpr (type == Type::CachedCPU) { | ||
| 319 | return words.cached_cpu.Pointer(IsShort()); | ||
| 320 | } else if constexpr (type == Type::Untracked) { | ||
| 321 | return words.untracked.Pointer(IsShort()); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | template <Type type> | ||
| 326 | const u64* Array() const noexcept { | ||
| 327 | if constexpr (type == Type::CPU) { | ||
| 328 | return words.cpu.Pointer(IsShort()); | ||
| 329 | } else if constexpr (type == Type::GPU) { | ||
| 330 | return words.gpu.Pointer(IsShort()); | ||
| 331 | } else if constexpr (type == Type::CachedCPU) { | ||
| 332 | return words.cached_cpu.Pointer(IsShort()); | ||
| 333 | } else if constexpr (type == Type::Untracked) { | ||
| 334 | return words.untracked.Pointer(IsShort()); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | |||
| 338 | /** | ||
| 339 | * Change the state of a range of pages | ||
| 340 | * | ||
| 341 | * @param dirty_addr Base address to mark or unmark as modified | ||
| 342 | * @param size Size in bytes to mark or unmark as modified | ||
| 343 | */ | ||
| 344 | template <Type type, bool enable> | ||
| 345 | void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { | ||
| 346 | const s64 difference = dirty_addr - cpu_addr; | ||
| 347 | const u64 offset = std::max<s64>(difference, 0); | ||
| 348 | size += std::min<s64>(difference, 0); | ||
| 349 | if (offset >= SizeBytes() || size < 0) { | ||
| 350 | return; | ||
| 351 | } | ||
| 352 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 353 | u64* const state_words = Array<type>(); | ||
| 354 | const u64 offset_end = std::min(offset + size, SizeBytes()); | ||
| 355 | const u64 begin_page_index = offset / BYTES_PER_PAGE; | ||
| 356 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; | ||
| 357 | const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE); | ||
| 358 | const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD); | ||
| 359 | u64 page_index = begin_page_index % PAGES_PER_WORD; | ||
| 360 | u64 word_index = begin_word_index; | ||
| 361 | while (word_index < end_word_index) { | ||
| 362 | const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD; | ||
| 363 | const u64 left_offset = | ||
| 364 | std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD; | ||
| 365 | const u64 right_offset = page_index; | ||
| 366 | u64 bits = ~u64{0}; | ||
| 367 | bits = (bits >> right_offset) << right_offset; | ||
| 368 | bits = (bits << left_offset) >> left_offset; | ||
| 369 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 370 | NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); | ||
| 371 | } | ||
| 372 | if constexpr (enable) { | ||
| 373 | state_words[word_index] |= bits; | ||
| 374 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 375 | untracked_words[word_index] |= bits; | ||
| 376 | } | ||
| 377 | } else { | ||
| 378 | state_words[word_index] &= ~bits; | ||
| 379 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 380 | untracked_words[word_index] &= ~bits; | ||
| 381 | } | ||
| 382 | } | ||
| 383 | page_index = 0; | ||
| 384 | ++word_index; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | /** | ||
| 389 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||
| 390 | * | ||
| 391 | * @param word_index Index to the word to notify to the rasterizer | ||
| 392 | * @param current_bits Current state of the word | ||
| 393 | * @param new_bits New state of the word | ||
| 394 | * | ||
| 395 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||
| 396 | */ | ||
| 397 | template <bool add_to_rasterizer> | ||
| 398 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||
| 399 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||
| 400 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||
| 401 | while (changed_bits != 0) { | ||
| 402 | const int empty_bits = std::countr_zero(changed_bits); | ||
| 403 | addr += empty_bits * BYTES_PER_PAGE; | ||
| 404 | changed_bits >>= empty_bits; | ||
| 405 | |||
| 406 | const u32 continuous_bits = std::countr_one(changed_bits); | ||
| 407 | const u64 size = continuous_bits * BYTES_PER_PAGE; | ||
| 408 | const VAddr begin_addr = addr; | ||
| 409 | addr += size; | ||
| 410 | changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0; | ||
| 411 | rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1); | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | /** | ||
| 416 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||
| 417 | * needed. Call the given function on each turned off range. | ||
| 418 | * | ||
| 419 | * @param query_cpu_range Base CPU address to loop over | ||
| 420 | * @param size Size in bytes of the CPU range to loop over | ||
| 421 | * @param func Function to call for each turned off region | ||
| 422 | */ | ||
| 423 | template <Type type, typename Func> | ||
| 424 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) { | ||
| 425 | static_assert(type != Type::Untracked); | ||
| 426 | |||
| 427 | const s64 difference = query_cpu_range - cpu_addr; | ||
| 428 | const u64 query_begin = std::max<s64>(difference, 0); | ||
| 429 | size += std::min<s64>(difference, 0); | ||
| 430 | if (query_begin >= SizeBytes() || size < 0) { | ||
| 431 | return; | ||
| 432 | } | ||
| 433 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 434 | u64* const state_words = Array<type>(); | ||
| 435 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | ||
| 436 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | ||
| 437 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); | ||
| 438 | |||
| 439 | const auto modified = [](u64 word) { return word != 0; }; | ||
| 440 | const auto first_modified_word = std::find_if(words_begin, words_end, modified); | ||
| 441 | if (first_modified_word == words_end) { | ||
| 442 | // Exit early when the buffer is not modified | ||
| 443 | return; | ||
| 444 | } | ||
| 445 | const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified); | ||
| 446 | |||
| 447 | const u64 word_index_begin = std::distance(state_words, first_modified_word); | ||
| 448 | const u64 word_index_end = std::distance(state_words, last_modified_word); | ||
| 449 | |||
| 450 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); | ||
| 451 | const unsigned local_page_end = | ||
| 452 | static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); | ||
| 453 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; | ||
| 454 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; | ||
| 455 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; | ||
| 456 | const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE); | ||
| 457 | const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin); | ||
| 458 | const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end); | ||
| 459 | const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD; | ||
| 460 | const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1; | ||
| 461 | |||
| 462 | u64 page_begin = first_word_page_begin; | ||
| 463 | u64 current_base = 0; | ||
| 464 | u64 current_size = 0; | ||
| 465 | bool on_going = false; | ||
| 466 | for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) { | ||
| 467 | const bool is_last_word = word_index + 1 == word_index_end; | ||
| 468 | const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD; | ||
| 469 | const u64 right_offset = page_begin; | ||
| 470 | const u64 left_offset = PAGES_PER_WORD - page_end; | ||
| 471 | u64 bits = ~u64{0}; | ||
| 472 | bits = (bits >> right_offset) << right_offset; | ||
| 473 | bits = (bits << left_offset) >> left_offset; | ||
| 474 | |||
| 475 | const u64 current_word = state_words[word_index] & bits; | ||
| 476 | if (clear) { | ||
| 477 | state_words[word_index] &= ~bits; | ||
| 478 | } | ||
| 479 | |||
| 480 | if constexpr (type == Type::CPU) { | ||
| 481 | const u64 current_bits = untracked_words[word_index] & bits; | ||
| 482 | untracked_words[word_index] &= ~bits; | ||
| 483 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); | ||
| 484 | } | ||
| 485 | // Exclude CPU modified pages when visiting GPU pages | ||
| 486 | const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||
| 487 | u64 page = page_begin; | ||
| 488 | page_begin = 0; | ||
| 489 | |||
| 490 | while (page < page_end) { | ||
| 491 | const int empty_bits = std::countr_zero(word >> page); | ||
| 492 | if (on_going && empty_bits != 0) { | ||
| 493 | InvokeModifiedRange(func, current_size, current_base); | ||
| 494 | current_size = 0; | ||
| 495 | on_going = false; | ||
| 496 | } | ||
| 497 | if (empty_bits == PAGES_PER_WORD) { | ||
| 498 | break; | ||
| 499 | } | ||
| 500 | page += empty_bits; | ||
| 501 | |||
| 502 | const int continuous_bits = std::countr_one(word >> page); | ||
| 503 | if (!on_going && continuous_bits != 0) { | ||
| 504 | current_base = word_index * PAGES_PER_WORD + page; | ||
| 505 | on_going = true; | ||
| 506 | } | ||
| 507 | current_size += continuous_bits; | ||
| 508 | page += continuous_bits; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | if (on_going && current_size > 0) { | ||
| 512 | InvokeModifiedRange(func, current_size, current_base); | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | template <typename Func> | ||
| 517 | void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) { | ||
| 518 | const u64 current_size_bytes = current_size * BYTES_PER_PAGE; | ||
| 519 | const u64 offset_begin = current_base * BYTES_PER_PAGE; | ||
| 520 | const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes()); | ||
| 521 | func(offset_begin, offset_end - offset_begin); | ||
| 522 | } | 108 | } |
| 523 | 109 | ||
| 524 | /** | 110 | private: |
| 525 | * Returns true when a region has been modified | ||
| 526 | * | ||
| 527 | * @param offset Offset in bytes from the start of the buffer | ||
| 528 | * @param size Size in bytes of the region to query for modifications | ||
| 529 | */ | ||
| 530 | template <Type type> | ||
| 531 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||
| 532 | static_assert(type != Type::Untracked); | ||
| 533 | |||
| 534 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 535 | const u64* const state_words = Array<type>(); | ||
| 536 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||
| 537 | const u64 word_begin = offset / BYTES_PER_WORD; | ||
| 538 | const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); | ||
| 539 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||
| 540 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | ||
| 541 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | ||
| 542 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||
| 543 | const u64 word = state_words[word_index] & ~off_word; | ||
| 544 | if (word == 0) { | ||
| 545 | continue; | ||
| 546 | } | ||
| 547 | const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit); | ||
| 548 | const u64 local_page_end = page_end % PAGES_PER_WORD; | ||
| 549 | const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD; | ||
| 550 | if (((word >> page_index) << page_index) << page_end_shift != 0) { | ||
| 551 | return true; | ||
| 552 | } | ||
| 553 | } | ||
| 554 | return false; | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * Returns a begin end pair with the inclusive modified region | ||
| 559 | * | ||
| 560 | * @param offset Offset in bytes from the start of the buffer | ||
| 561 | * @param size Size in bytes of the region to query for modifications | ||
| 562 | */ | ||
| 563 | template <Type type> | ||
| 564 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||
| 565 | static_assert(type != Type::Untracked); | ||
| 566 | |||
| 567 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 568 | const u64* const state_words = Array<type>(); | ||
| 569 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||
| 570 | const u64 word_begin = offset / BYTES_PER_WORD; | ||
| 571 | const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); | ||
| 572 | const u64 page_base = offset / BYTES_PER_PAGE; | ||
| 573 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||
| 574 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 575 | u64 end = 0; | ||
| 576 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | ||
| 577 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||
| 578 | const u64 word = state_words[word_index] & ~off_word; | ||
| 579 | if (word == 0) { | ||
| 580 | continue; | ||
| 581 | } | ||
| 582 | const u64 local_page_begin = std::countr_zero(word); | ||
| 583 | const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); | ||
| 584 | const u64 page_index = word_index * PAGES_PER_WORD; | ||
| 585 | const u64 page_begin = std::max(page_index + local_page_begin, page_base); | ||
| 586 | const u64 page_end = std::min(page_index + local_page_end, page_limit); | ||
| 587 | begin = std::min(begin, page_begin); | ||
| 588 | end = std::max(end, page_end); | ||
| 589 | } | ||
| 590 | static constexpr std::pair<u64, u64> EMPTY{0, 0}; | ||
| 591 | return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; | ||
| 592 | } | ||
| 593 | |||
| 594 | /// Returns the number of words of the buffer | ||
| 595 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 596 | return words.NumWords(); | ||
| 597 | } | ||
| 598 | |||
| 599 | /// Returns true when the buffer fits in the small vector optimization | ||
| 600 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 601 | return words.IsShort(); | ||
| 602 | } | ||
| 603 | |||
| 604 | RasterizerInterface* rasterizer = nullptr; | ||
| 605 | VAddr cpu_addr = 0; | 111 | VAddr cpu_addr = 0; |
| 606 | Words words; | ||
| 607 | BufferFlagBits flags{}; | 112 | BufferFlagBits flags{}; |
| 608 | int stream_score = 0; | 113 | int stream_score = 0; |
| 609 | size_t lru_id = SIZE_MAX; | 114 | size_t lru_id = SIZE_MAX; |
| 115 | size_t size_bytes = 0; | ||
| 610 | }; | 116 | }; |
| 611 | 117 | ||
| 612 | } // namespace VideoCommon | 118 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index a16308b60..40db243d2 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/microprofile.h" | 4 | #include "common/microprofile.h" |
| 5 | 5 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index abdc593df..7975564b5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1,485 +1,29 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <mutex> | ||
| 10 | #include <numeric> | 8 | #include <numeric> |
| 11 | #include <span> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/icl/interval_set.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "common/div_ceil.h" | ||
| 19 | #include "common/literals.h" | ||
| 20 | #include "common/lru_cache.h" | ||
| 21 | #include "common/microprofile.h" | ||
| 22 | #include "common/polyfill_ranges.h" | ||
| 23 | #include "common/scratch_buffer.h" | ||
| 24 | #include "common/settings.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 27 | #include "video_core/control/channel_state_cache.h" | ||
| 28 | #include "video_core/delayed_destruction_ring.h" | ||
| 29 | #include "video_core/dirty_flags.h" | ||
| 30 | #include "video_core/engines/draw_manager.h" | ||
| 31 | #include "video_core/engines/kepler_compute.h" | ||
| 32 | #include "video_core/engines/maxwell_3d.h" | ||
| 33 | #include "video_core/memory_manager.h" | ||
| 34 | #include "video_core/rasterizer_interface.h" | ||
| 35 | #include "video_core/surface.h" | ||
| 36 | #include "video_core/texture_cache/slot_vector.h" | ||
| 37 | #include "video_core/texture_cache/types.h" | ||
| 38 | 9 | ||
| 39 | namespace VideoCommon { | 10 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 40 | |||
| 41 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | ||
| 42 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 43 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 44 | |||
| 45 | using BufferId = SlotId; | ||
| 46 | |||
| 47 | using VideoCore::Surface::PixelFormat; | ||
| 48 | using namespace Common::Literals; | ||
| 49 | |||
| 50 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 51 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 52 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 53 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 54 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 55 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 56 | constexpr u32 NUM_STAGES = 5; | ||
| 57 | |||
| 58 | enum class ObtainBufferSynchronize : u32 { | ||
| 59 | NoSynchronize = 0, | ||
| 60 | FullSynchronize = 1, | ||
| 61 | SynchronizeNoDirty = 2, | ||
| 62 | }; | ||
| 63 | |||
| 64 | enum class ObtainBufferOperation : u32 { | ||
| 65 | DoNothing = 0, | ||
| 66 | MarkAsWritten = 1, | ||
| 67 | DiscardWrite = 2, | ||
| 68 | MarkQuery = 3, | ||
| 69 | }; | ||
| 70 | |||
| 71 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||
| 72 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 73 | |||
| 74 | template <typename P> | ||
| 75 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 76 | |||
| 77 | // Page size for caching purposes. | ||
| 78 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | ||
| 79 | static constexpr u32 YUZU_PAGEBITS = 16; | ||
| 80 | static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS; | ||
| 81 | |||
| 82 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 83 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 84 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 85 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 86 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 87 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 88 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 89 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 90 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 91 | |||
| 92 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 93 | |||
| 94 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | ||
| 95 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | ||
| 96 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 97 | |||
| 98 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 99 | |||
| 100 | using Runtime = typename P::Runtime; | ||
| 101 | using Buffer = typename P::Buffer; | ||
| 102 | |||
| 103 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 104 | using IntervalType = typename IntervalSet::interval_type; | ||
| 105 | |||
| 106 | struct Empty {}; | ||
| 107 | |||
| 108 | struct OverlapResult { | ||
| 109 | std::vector<BufferId> ids; | ||
| 110 | VAddr begin; | ||
| 111 | VAddr end; | ||
| 112 | bool has_stream_leap = false; | ||
| 113 | }; | ||
| 114 | |||
| 115 | struct Binding { | ||
| 116 | VAddr cpu_addr{}; | ||
| 117 | u32 size{}; | ||
| 118 | BufferId buffer_id; | ||
| 119 | }; | ||
| 120 | |||
| 121 | struct TextureBufferBinding : Binding { | ||
| 122 | PixelFormat format; | ||
| 123 | }; | ||
| 124 | |||
| 125 | static constexpr Binding NULL_BINDING{ | ||
| 126 | .cpu_addr = 0, | ||
| 127 | .size = 0, | ||
| 128 | .buffer_id = NULL_BUFFER_ID, | ||
| 129 | }; | ||
| 130 | |||
| 131 | public: | ||
| 132 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 133 | |||
| 134 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 135 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 136 | |||
| 137 | void TickFrame(); | ||
| 138 | |||
| 139 | void WriteMemory(VAddr cpu_addr, u64 size); | ||
| 140 | |||
| 141 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||
| 142 | |||
| 143 | void DownloadMemory(VAddr cpu_addr, u64 size); | ||
| 144 | |||
| 145 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||
| 146 | |||
| 147 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||
| 148 | |||
| 149 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||
| 150 | |||
| 151 | void UpdateGraphicsBuffers(bool is_indexed); | ||
| 152 | |||
| 153 | void UpdateComputeBuffers(); | ||
| 154 | |||
| 155 | void BindHostGeometryBuffers(bool is_indexed); | ||
| 156 | |||
| 157 | void BindHostStageBuffers(size_t stage); | ||
| 158 | |||
| 159 | void BindHostComputeBuffers(); | ||
| 160 | |||
| 161 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | ||
| 162 | const UniformBufferSizes* sizes); | ||
| 163 | |||
| 164 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); | ||
| 165 | |||
| 166 | void UnbindGraphicsStorageBuffers(size_t stage); | ||
| 167 | |||
| 168 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 169 | bool is_written); | ||
| 170 | |||
| 171 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 172 | |||
| 173 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 174 | PixelFormat format, bool is_written, bool is_image); | ||
| 175 | |||
| 176 | void UnbindComputeStorageBuffers(); | ||
| 177 | |||
| 178 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 179 | bool is_written); | ||
| 180 | |||
| 181 | void UnbindComputeTextureBuffers(); | ||
| 182 | |||
| 183 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 184 | bool is_written, bool is_image); | ||
| 185 | |||
| 186 | void FlushCachedWrites(); | ||
| 187 | |||
| 188 | /// Return true when there are uncommitted buffers to be downloaded | ||
| 189 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 190 | |||
| 191 | void AccumulateFlushes(); | ||
| 192 | |||
| 193 | /// Return true when the caller should wait for async downloads | ||
| 194 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 195 | |||
| 196 | /// Commit asynchronous downloads | ||
| 197 | void CommitAsyncFlushes(); | ||
| 198 | void CommitAsyncFlushesHigh(); | ||
| 199 | |||
| 200 | /// Pop asynchronous downloads | ||
| 201 | void PopAsyncFlushes(); | ||
| 202 | |||
| 203 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 204 | |||
| 205 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 206 | |||
| 207 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 208 | ObtainBufferSynchronize sync_info, | ||
| 209 | ObtainBufferOperation post_op); | ||
| 210 | |||
| 211 | /// Return true when a CPU region is modified from the GPU | ||
| 212 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 213 | |||
| 214 | /// Return true when a region is registered on the cache | ||
| 215 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 216 | |||
| 217 | /// Return true when a CPU region is modified from the CPU | ||
| 218 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
| 219 | |||
| 220 | void SetDrawIndirect( | ||
| 221 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 222 | current_draw_indirect = current_draw_indirect_; | ||
| 223 | } | ||
| 224 | |||
| 225 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 226 | |||
| 227 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 228 | |||
| 229 | std::recursive_mutex mutex; | ||
| 230 | Runtime& runtime; | ||
| 231 | |||
| 232 | private: | ||
| 233 | template <typename Func> | ||
| 234 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 235 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 236 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 237 | index += disabled_bits; | ||
| 238 | enabled_mask >>= disabled_bits; | ||
| 239 | func(index); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | template <typename Func> | ||
| 244 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 245 | const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE); | ||
| 246 | for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) { | ||
| 247 | const BufferId buffer_id = page_table[page]; | ||
| 248 | if (!buffer_id) { | ||
| 249 | ++page; | ||
| 250 | continue; | ||
| 251 | } | ||
| 252 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 253 | func(buffer_id, buffer); | ||
| 254 | |||
| 255 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 256 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | template <typename Func> | ||
| 261 | void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 262 | const VAddr start_address = cpu_addr; | ||
| 263 | const VAddr end_address = start_address + size; | ||
| 264 | const VAddr search_base = | ||
| 265 | static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size))); | ||
| 266 | const IntervalType search_interval{search_base, search_base + 1}; | ||
| 267 | auto it = common_ranges.lower_bound(search_interval); | ||
| 268 | if (it == common_ranges.end()) { | ||
| 269 | it = common_ranges.begin(); | ||
| 270 | } | ||
| 271 | for (; it != common_ranges.end(); it++) { | ||
| 272 | VAddr inter_addr_end = it->upper(); | ||
| 273 | VAddr inter_addr = it->lower(); | ||
| 274 | if (inter_addr >= end_address) { | ||
| 275 | break; | ||
| 276 | } | ||
| 277 | if (inter_addr_end <= start_address) { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | if (inter_addr_end > end_address) { | ||
| 281 | inter_addr_end = end_address; | ||
| 282 | } | ||
| 283 | if (inter_addr < start_address) { | ||
| 284 | inter_addr = start_address; | ||
| 285 | } | ||
| 286 | func(inter_addr, inter_addr_end); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||
| 291 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||
| 292 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||
| 293 | } | ||
| 294 | |||
| 295 | void RunGarbageCollector(); | ||
| 296 | |||
| 297 | void BindHostIndexBuffer(); | ||
| 298 | |||
| 299 | void BindHostVertexBuffers(); | ||
| 300 | |||
| 301 | void BindHostDrawIndirectBuffers(); | ||
| 302 | |||
| 303 | void BindHostGraphicsUniformBuffers(size_t stage); | ||
| 304 | |||
| 305 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | ||
| 306 | |||
| 307 | void BindHostGraphicsStorageBuffers(size_t stage); | ||
| 308 | |||
| 309 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 310 | |||
| 311 | void BindHostTransformFeedbackBuffers(); | ||
| 312 | |||
| 313 | void BindHostComputeUniformBuffers(); | ||
| 314 | |||
| 315 | void BindHostComputeStorageBuffers(); | ||
| 316 | |||
| 317 | void BindHostComputeTextureBuffers(); | ||
| 318 | |||
| 319 | void DoUpdateGraphicsBuffers(bool is_indexed); | ||
| 320 | |||
| 321 | void DoUpdateComputeBuffers(); | ||
| 322 | |||
| 323 | void UpdateIndexBuffer(); | ||
| 324 | |||
| 325 | void UpdateVertexBuffers(); | ||
| 326 | |||
| 327 | void UpdateVertexBuffer(u32 index); | ||
| 328 | |||
| 329 | void UpdateDrawIndirect(); | ||
| 330 | |||
| 331 | void UpdateUniformBuffers(size_t stage); | ||
| 332 | |||
| 333 | void UpdateStorageBuffers(size_t stage); | ||
| 334 | |||
| 335 | void UpdateTextureBuffers(size_t stage); | ||
| 336 | |||
| 337 | void UpdateTransformFeedbackBuffers(); | ||
| 338 | |||
| 339 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 340 | |||
| 341 | void UpdateComputeUniformBuffers(); | ||
| 342 | |||
| 343 | void UpdateComputeStorageBuffers(); | ||
| 344 | |||
| 345 | void UpdateComputeTextureBuffers(); | ||
| 346 | |||
| 347 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 348 | |||
| 349 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 350 | |||
| 351 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 352 | |||
| 353 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 354 | |||
| 355 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 356 | |||
| 357 | void Register(BufferId buffer_id); | ||
| 358 | |||
| 359 | void Unregister(BufferId buffer_id); | ||
| 360 | |||
| 361 | template <bool insert> | ||
| 362 | void ChangeRegister(BufferId buffer_id); | ||
| 363 | |||
| 364 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||
| 365 | |||
| 366 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 367 | |||
| 368 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 369 | |||
| 370 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 371 | std::span<BufferCopy> copies); | ||
| 372 | |||
| 373 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 374 | std::span<const BufferCopy> copies); | ||
| 375 | |||
| 376 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 377 | |||
| 378 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 379 | |||
| 380 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 381 | |||
| 382 | void DeleteBuffer(BufferId buffer_id); | ||
| 383 | |||
| 384 | void NotifyBufferDeletion(); | ||
| 385 | |||
| 386 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | ||
| 387 | bool is_written = false) const; | ||
| 388 | |||
| 389 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 390 | PixelFormat format); | ||
| 391 | |||
| 392 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 393 | |||
| 394 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 395 | |||
| 396 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 397 | |||
| 398 | void ClearDownload(IntervalType subtract_interval); | ||
| 399 | |||
| 400 | VideoCore::RasterizerInterface& rasterizer; | ||
| 401 | Core::Memory::Memory& cpu_memory; | ||
| 402 | |||
| 403 | SlotVector<Buffer> slot_buffers; | ||
| 404 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 405 | |||
| 406 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 407 | |||
| 408 | u32 last_index_count = 0; | ||
| 409 | |||
| 410 | Binding index_buffer; | ||
| 411 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 412 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 413 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 414 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 415 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 416 | Binding count_buffer_binding; | ||
| 417 | Binding indirect_buffer_binding; | ||
| 418 | |||
| 419 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 420 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 421 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 422 | |||
| 423 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 424 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 425 | |||
| 426 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 427 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 428 | |||
| 429 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 430 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 431 | u32 enabled_compute_storage_buffers = 0; | ||
| 432 | u32 written_compute_storage_buffers = 0; | ||
| 433 | |||
| 434 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 435 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 436 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 437 | u32 enabled_compute_texture_buffers = 0; | ||
| 438 | u32 written_compute_texture_buffers = 0; | ||
| 439 | u32 image_compute_texture_buffers = 0; | ||
| 440 | |||
| 441 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 442 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 443 | |||
| 444 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 445 | |||
| 446 | bool has_deleted_buffers = false; | ||
| 447 | 11 | ||
| 448 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | 12 | namespace VideoCommon { |
| 449 | dirty_uniform_buffers{}; | ||
| 450 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 451 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 452 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 453 | uniform_buffer_binding_sizes{}; | ||
| 454 | |||
| 455 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 456 | |||
| 457 | IntervalSet uncommitted_ranges; | ||
| 458 | IntervalSet common_ranges; | ||
| 459 | std::deque<IntervalSet> committed_ranges; | ||
| 460 | |||
| 461 | Common::ScratchBuffer<u8> immediate_buffer_alloc; | ||
| 462 | |||
| 463 | struct LRUItemParams { | ||
| 464 | using ObjectType = BufferId; | ||
| 465 | using TickType = u64; | ||
| 466 | }; | ||
| 467 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 468 | u64 frame_tick = 0; | ||
| 469 | u64 total_used_memory = 0; | ||
| 470 | u64 minimum_memory = 0; | ||
| 471 | u64 critical_memory = 0; | ||
| 472 | 13 | ||
| 473 | std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table; | 14 | using Core::Memory::YUZU_PAGESIZE; |
| 474 | }; | ||
| 475 | 15 | ||
| 476 | template <class P> | 16 | template <class P> |
| 477 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 478 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) |
| 479 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { | 19 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{ |
| 20 | rasterizer} { | ||
| 480 | // Ensure the first slot is used for the null buffer | 21 | // Ensure the first slot is used for the null buffer |
| 481 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 482 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| 24 | inline_buffer_id = NULL_BUFFER_ID; | ||
| 25 | |||
| 26 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 483 | 27 | ||
| 484 | if (!runtime.CanReportMemoryUsage()) { | 28 | if (!runtime.CanReportMemoryUsage()) { |
| 485 | minimum_memory = DEFAULT_EXPECTED_MEMORY; | 29 | minimum_memory = DEFAULT_EXPECTED_MEMORY; |
| @@ -531,6 +75,8 @@ void BufferCache<P>::TickFrame() { | |||
| 531 | uniform_cache_hits[0] = 0; | 75 | uniform_cache_hits[0] = 0; |
| 532 | uniform_cache_shots[0] = 0; | 76 | uniform_cache_shots[0] = 0; |
| 533 | 77 | ||
| 78 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 79 | |||
| 534 | const bool skip_preferred = hits * 256 < shots * 251; | 80 | const bool skip_preferred = hits * 256 < shots * 251; |
| 535 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 81 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 536 | 82 | ||
| @@ -543,35 +89,62 @@ void BufferCache<P>::TickFrame() { | |||
| 543 | } | 89 | } |
| 544 | ++frame_tick; | 90 | ++frame_tick; |
| 545 | delayed_destruction_ring.Tick(); | 91 | delayed_destruction_ring.Tick(); |
| 92 | |||
| 93 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 94 | for (auto& buffer : async_buffers_death_ring) { | ||
| 95 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 96 | } | ||
| 97 | async_buffers_death_ring.clear(); | ||
| 98 | } | ||
| 546 | } | 99 | } |
| 547 | 100 | ||
| 548 | template <class P> | 101 | template <class P> |
| 549 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 102 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { |
| 550 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 103 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); |
| 551 | buffer.MarkRegionAsCpuModified(cpu_addr, size); | 104 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { |
| 552 | }); | 105 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; |
| 106 | ClearDownload(subtract_interval); | ||
| 107 | common_ranges.subtract(subtract_interval); | ||
| 108 | } | ||
| 553 | } | 109 | } |
| 554 | 110 | ||
| 555 | template <class P> | 111 | template <class P> |
| 556 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 112 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { |
| 557 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 113 | memory_tracker.CachedCpuWrite(cpu_addr, size); |
| 558 | if (!buffer.HasCachedWrites()) { | 114 | const IntervalType add_interval{Common::AlignDown(cpu_addr, YUZU_PAGESIZE), |
| 559 | cached_write_buffer_ids.push_back(buffer_id); | 115 | Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE)}; |
| 560 | } | 116 | cached_ranges.add(add_interval); |
| 561 | buffer.CachedCpuWrite(cpu_addr, size); | ||
| 562 | }); | ||
| 563 | } | 117 | } |
| 564 | 118 | ||
| 565 | template <class P> | 119 | template <class P> |
| 566 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 120 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 121 | WaitOnAsyncFlushes(cpu_addr, size); | ||
| 567 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 122 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { |
| 568 | DownloadBufferMemory(buffer, cpu_addr, size); | 123 | DownloadBufferMemory(buffer, cpu_addr, size); |
| 569 | }); | 124 | }); |
| 570 | } | 125 | } |
| 571 | 126 | ||
| 572 | template <class P> | 127 | template <class P> |
| 128 | void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) { | ||
| 129 | bool must_wait = false; | ||
| 130 | ForEachInOverlapCounter(async_downloads, cpu_addr, size, | ||
| 131 | [&](VAddr, VAddr, int) { must_wait = true; }); | ||
| 132 | bool must_release = false; | ||
| 133 | ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; }); | ||
| 134 | if (must_release) { | ||
| 135 | std::function<void()> tmp([]() {}); | ||
| 136 | rasterizer.SignalFence(std::move(tmp)); | ||
| 137 | } | ||
| 138 | if (must_wait || must_release) { | ||
| 139 | rasterizer.ReleaseFences(); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | template <class P> | ||
| 573 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | 144 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { |
| 145 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); | ||
| 574 | uncommitted_ranges.subtract(subtract_interval); | 146 | uncommitted_ranges.subtract(subtract_interval); |
| 147 | pending_ranges.subtract(subtract_interval); | ||
| 575 | for (auto& interval_set : committed_ranges) { | 148 | for (auto& interval_set : committed_ranges) { |
| 576 | interval_set.subtract(subtract_interval); | 149 | interval_set.subtract(subtract_interval); |
| 577 | } | 150 | } |
| @@ -591,6 +164,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 591 | } | 164 | } |
| 592 | 165 | ||
| 593 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 166 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |
| 167 | WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount)); | ||
| 594 | ClearDownload(subtract_interval); | 168 | ClearDownload(subtract_interval); |
| 595 | 169 | ||
| 596 | BufferId buffer_a; | 170 | BufferId buffer_a; |
| @@ -616,10 +190,11 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 616 | const VAddr diff = base_address - *cpu_src_address; | 190 | const VAddr diff = base_address - *cpu_src_address; |
| 617 | const VAddr new_base_address = *cpu_dest_address + diff; | 191 | const VAddr new_base_address = *cpu_dest_address + diff; |
| 618 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 192 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 619 | uncommitted_ranges.add(add_interval); | ||
| 620 | tmp_intervals.push_back(add_interval); | 193 | tmp_intervals.push_back(add_interval); |
| 194 | uncommitted_ranges.add(add_interval); | ||
| 195 | pending_ranges.add(add_interval); | ||
| 621 | }; | 196 | }; |
| 622 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 197 | ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); |
| 623 | // This subtraction in this order is important for overlapping copies. | 198 | // This subtraction in this order is important for overlapping copies. |
| 624 | common_ranges.subtract(subtract_interval); | 199 | common_ranges.subtract(subtract_interval); |
| 625 | const bool has_new_downloads = tmp_intervals.size() != 0; | 200 | const bool has_new_downloads = tmp_intervals.size() != 0; |
| @@ -628,7 +203,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 628 | } | 203 | } |
| 629 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 204 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); |
| 630 | if (has_new_downloads) { | 205 | if (has_new_downloads) { |
| 631 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 206 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 632 | } | 207 | } |
| 633 | std::vector<u8> tmp_buffer(amount); | 208 | std::vector<u8> tmp_buffer(amount); |
| 634 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | 209 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); |
| @@ -866,10 +441,9 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
| 866 | 441 | ||
| 867 | template <class P> | 442 | template <class P> |
| 868 | void BufferCache<P>::FlushCachedWrites() { | 443 | void BufferCache<P>::FlushCachedWrites() { |
| 869 | for (const BufferId buffer_id : cached_write_buffer_ids) { | ||
| 870 | slot_buffers[buffer_id].FlushCachedWrites(); | ||
| 871 | } | ||
| 872 | cached_write_buffer_ids.clear(); | 444 | cached_write_buffer_ids.clear(); |
| 445 | memory_tracker.FlushCachedWrites(); | ||
| 446 | cached_ranges.clear(); | ||
| 873 | } | 447 | } |
| 874 | 448 | ||
| 875 | template <class P> | 449 | template <class P> |
| @@ -879,10 +453,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | |||
| 879 | 453 | ||
| 880 | template <class P> | 454 | template <class P> |
| 881 | void BufferCache<P>::AccumulateFlushes() { | 455 | void BufferCache<P>::AccumulateFlushes() { |
| 882 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 883 | uncommitted_ranges.clear(); | ||
| 884 | return; | ||
| 885 | } | ||
| 886 | if (uncommitted_ranges.empty()) { | 456 | if (uncommitted_ranges.empty()) { |
| 887 | return; | 457 | return; |
| 888 | } | 458 | } |
| @@ -891,7 +461,11 @@ void BufferCache<P>::AccumulateFlushes() { | |||
| 891 | 461 | ||
| 892 | template <class P> | 462 | template <class P> |
| 893 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | 463 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 894 | return false; | 464 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 465 | return (!async_buffers.empty() && async_buffers.front().has_value()); | ||
| 466 | } else { | ||
| 467 | return false; | ||
| 468 | } | ||
| 895 | } | 469 | } |
| 896 | 470 | ||
| 897 | template <class P> | 471 | template <class P> |
| @@ -899,12 +473,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 899 | AccumulateFlushes(); | 473 | AccumulateFlushes(); |
| 900 | 474 | ||
| 901 | if (committed_ranges.empty()) { | 475 | if (committed_ranges.empty()) { |
| 476 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 477 | if (active_async_buffers) { | ||
| 478 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 479 | } | ||
| 480 | } | ||
| 902 | return; | 481 | return; |
| 903 | } | 482 | } |
| 904 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 483 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 905 | const bool is_accuracy_normal = | ||
| 906 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 907 | 484 | ||
| 485 | pending_ranges.clear(); | ||
| 908 | auto it = committed_ranges.begin(); | 486 | auto it = committed_ranges.begin(); |
| 909 | while (it != committed_ranges.end()) { | 487 | while (it != committed_ranges.end()) { |
| 910 | auto& current_intervals = *it; | 488 | auto& current_intervals = *it; |
| @@ -926,11 +504,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 926 | const std::size_t size = interval.upper() - interval.lower(); | 504 | const std::size_t size = interval.upper() - interval.lower(); |
| 927 | const VAddr cpu_addr = interval.lower(); | 505 | const VAddr cpu_addr = interval.lower(); |
| 928 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 506 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 929 | buffer.ForEachDownloadRangeAndClear( | 507 | const VAddr buffer_start = buffer.CpuAddr(); |
| 930 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 508 | const VAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| 931 | if (is_accuracy_normal) { | 509 | const VAddr new_start = std::max(buffer_start, cpu_addr); |
| 932 | return; | 510 | const VAddr new_end = std::min(buffer_end, cpu_addr + size); |
| 933 | } | 511 | memory_tracker.ForEachDownloadRange( |
| 512 | new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 934 | const VAddr buffer_addr = buffer.CpuAddr(); | 513 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 935 | const auto add_download = [&](VAddr start, VAddr end) { | 514 | const auto add_download = [&](VAddr start, VAddr end) { |
| 936 | const u64 new_offset = start - buffer_addr; | 515 | const u64 new_offset = start - buffer_addr; |
| @@ -944,92 +523,142 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 944 | buffer_id, | 523 | buffer_id, |
| 945 | }); | 524 | }); |
| 946 | // Align up to avoid cache conflicts | 525 | // Align up to avoid cache conflicts |
| 947 | constexpr u64 align = 8ULL; | 526 | constexpr u64 align = 64ULL; |
| 948 | constexpr u64 mask = ~(align - 1ULL); | 527 | constexpr u64 mask = ~(align - 1ULL); |
| 949 | total_size_bytes += (new_size + align - 1) & mask; | 528 | total_size_bytes += (new_size + align - 1) & mask; |
| 950 | largest_copy = std::max(largest_copy, new_size); | 529 | largest_copy = std::max(largest_copy, new_size); |
| 951 | }; | 530 | }; |
| 952 | 531 | ||
| 953 | const VAddr start_address = buffer_addr + range_offset; | 532 | ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); |
| 954 | const VAddr end_address = start_address + range_size; | ||
| 955 | ForEachWrittenRange(start_address, range_size, add_download); | ||
| 956 | const IntervalType subtract_interval{start_address, end_address}; | ||
| 957 | common_ranges.subtract(subtract_interval); | ||
| 958 | }); | 533 | }); |
| 959 | }); | 534 | }); |
| 960 | } | 535 | } |
| 961 | } | 536 | } |
| 962 | committed_ranges.clear(); | 537 | committed_ranges.clear(); |
| 963 | if (downloads.empty()) { | 538 | if (downloads.empty()) { |
| 539 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 540 | if (active_async_buffers) { | ||
| 541 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 542 | } | ||
| 543 | } | ||
| 964 | return; | 544 | return; |
| 965 | } | 545 | } |
| 966 | if constexpr (USE_MEMORY_MAPS) { | 546 | if (active_async_buffers) { |
| 967 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 547 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 968 | runtime.PreCopyBarrier(); | 548 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 969 | for (auto& [copy, buffer_id] : downloads) { | 549 | boost::container::small_vector<BufferCopy, 4> normalized_copies; |
| 970 | // Have in mind the staging buffer offset for the copy | 550 | IntervalSet new_async_range{}; |
| 971 | copy.dst_offset += download_staging.offset; | 551 | runtime.PreCopyBarrier(); |
| 972 | const std::array copies{copy}; | 552 | for (auto& [copy, buffer_id] : downloads) { |
| 973 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | 553 | copy.dst_offset += download_staging.offset; |
| 974 | } | 554 | const std::array copies{copy}; |
| 975 | runtime.PostCopyBarrier(); | 555 | BufferCopy second_copy{copy}; |
| 976 | runtime.Finish(); | 556 | Buffer& buffer = slot_buffers[buffer_id]; |
| 977 | for (const auto& [copy, buffer_id] : downloads) { | 557 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 978 | const Buffer& buffer = slot_buffers[buffer_id]; | 558 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 979 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 559 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 980 | // Undo the modified offset | 560 | async_downloads += std::make_pair(base_interval, 1); |
| 981 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 561 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 982 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 562 | normalized_copies.push_back(second_copy); |
| 983 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 563 | } |
| 564 | runtime.PostCopyBarrier(); | ||
| 565 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 566 | async_buffers.emplace_back(download_staging); | ||
| 567 | } else { | ||
| 568 | committed_ranges.clear(); | ||
| 569 | uncommitted_ranges.clear(); | ||
| 984 | } | 570 | } |
| 985 | } else { | 571 | } else { |
| 986 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 572 | if constexpr (USE_MEMORY_MAPS) { |
| 987 | for (const auto& [copy, buffer_id] : downloads) { | 573 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 988 | Buffer& buffer = slot_buffers[buffer_id]; | 574 | runtime.PreCopyBarrier(); |
| 989 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 575 | for (auto& [copy, buffer_id] : downloads) { |
| 990 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 576 | // Have in mind the staging buffer offset for the copy |
| 991 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 577 | copy.dst_offset += download_staging.offset; |
| 578 | const std::array copies{copy}; | ||
| 579 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | ||
| 580 | } | ||
| 581 | runtime.PostCopyBarrier(); | ||
| 582 | runtime.Finish(); | ||
| 583 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 584 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 585 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 586 | // Undo the modified offset | ||
| 587 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 588 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 589 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 593 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 594 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 595 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 596 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 597 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 598 | } | ||
| 992 | } | 599 | } |
| 993 | } | 600 | } |
| 994 | } | 601 | } |
| 995 | 602 | ||
| 996 | template <class P> | 603 | template <class P> |
| 997 | void BufferCache<P>::CommitAsyncFlushes() { | 604 | void BufferCache<P>::CommitAsyncFlushes() { |
| 998 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | 605 | CommitAsyncFlushesHigh(); |
| 999 | CommitAsyncFlushesHigh(); | ||
| 1000 | } else { | ||
| 1001 | uncommitted_ranges.clear(); | ||
| 1002 | committed_ranges.clear(); | ||
| 1003 | } | ||
| 1004 | } | 606 | } |
| 1005 | 607 | ||
| 1006 | template <class P> | 608 | template <class P> |
| 1007 | void BufferCache<P>::PopAsyncFlushes() {} | 609 | void BufferCache<P>::PopAsyncFlushes() { |
| 610 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 611 | PopAsyncBuffers(); | ||
| 612 | } | ||
| 1008 | 613 | ||
| 1009 | template <class P> | 614 | template <class P> |
| 1010 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 615 | void BufferCache<P>::PopAsyncBuffers() { |
| 1011 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | 616 | if (async_buffers.empty()) { |
| 1012 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | 617 | return; |
| 1013 | const BufferId image_id = page_table[page]; | 618 | } |
| 1014 | if (!image_id) { | 619 | if (!async_buffers.front().has_value()) { |
| 1015 | ++page; | 620 | async_buffers.pop_front(); |
| 1016 | continue; | 621 | return; |
| 1017 | } | 622 | } |
| 1018 | Buffer& buffer = slot_buffers[image_id]; | 623 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 1019 | if (buffer.IsRegionGpuModified(addr, size)) { | 624 | auto& downloads = pending_downloads.front(); |
| 1020 | return true; | 625 | auto& async_buffer = async_buffers.front(); |
| 626 | u8* base = async_buffer->mapped_span.data(); | ||
| 627 | const size_t base_offset = async_buffer->offset; | ||
| 628 | for (const auto& copy : downloads) { | ||
| 629 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | ||
| 630 | const u64 dst_offset = copy.dst_offset - base_offset; | ||
| 631 | const u8* read_mapped_memory = base + dst_offset; | ||
| 632 | ForEachInOverlapCounter( | ||
| 633 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { | ||
| 634 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], | ||
| 635 | end - start); | ||
| 636 | if (count == 1) { | ||
| 637 | const IntervalType base_interval{start, end}; | ||
| 638 | common_ranges.subtract(base_interval); | ||
| 639 | } | ||
| 640 | }); | ||
| 641 | const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | ||
| 642 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | ||
| 1021 | } | 643 | } |
| 1022 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | 644 | async_buffers_death_ring.emplace_back(*async_buffer); |
| 1023 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 645 | async_buffers.pop_front(); |
| 646 | pending_downloads.pop_front(); | ||
| 1024 | } | 647 | } |
| 1025 | return false; | 648 | } |
| 649 | |||
| 650 | template <class P> | ||
| 651 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 652 | bool is_dirty = false; | ||
| 653 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | ||
| 654 | return is_dirty; | ||
| 1026 | } | 655 | } |
| 1027 | 656 | ||
| 1028 | template <class P> | 657 | template <class P> |
| 1029 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | 658 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { |
| 1030 | const VAddr end_addr = addr + size; | 659 | const VAddr end_addr = addr + size; |
| 1031 | const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 660 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 1032 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | 661 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { |
| 1033 | const BufferId buffer_id = page_table[page]; | 662 | const BufferId buffer_id = page_table[page]; |
| 1034 | if (!buffer_id) { | 663 | if (!buffer_id) { |
| 1035 | ++page; | 664 | ++page; |
| @@ -1041,28 +670,14 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 1041 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | 670 | if (buf_start_addr < end_addr && addr < buf_end_addr) { |
| 1042 | return true; | 671 | return true; |
| 1043 | } | 672 | } |
| 1044 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 673 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 1045 | } | 674 | } |
| 1046 | return false; | 675 | return false; |
| 1047 | } | 676 | } |
| 1048 | 677 | ||
| 1049 | template <class P> | 678 | template <class P> |
| 1050 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 679 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { |
| 1051 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | 680 | return memory_tracker.IsRegionCpuModified(addr, size); |
| 1052 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | ||
| 1053 | const BufferId image_id = page_table[page]; | ||
| 1054 | if (!image_id) { | ||
| 1055 | ++page; | ||
| 1056 | continue; | ||
| 1057 | } | ||
| 1058 | Buffer& buffer = slot_buffers[image_id]; | ||
| 1059 | if (buffer.IsRegionCpuModified(addr, size)) { | ||
| 1060 | return true; | ||
| 1061 | } | ||
| 1062 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 1063 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | ||
| 1064 | } | ||
| 1065 | return false; | ||
| 1066 | } | 681 | } |
| 1067 | 682 | ||
| 1068 | template <class P> | 683 | template <class P> |
| @@ -1072,7 +687,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 1072 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 687 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 1073 | const u32 size = index_buffer.size; | 688 | const u32 size = index_buffer.size; |
| 1074 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 689 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1075 | if (!draw_state.inline_index_draw_indexes.empty()) { | 690 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 1076 | if constexpr (USE_MEMORY_MAPS) { | 691 | if constexpr (USE_MEMORY_MAPS) { |
| 1077 | auto upload_staging = runtime.UploadStagingBuffer(size); | 692 | auto upload_staging = runtime.UploadStagingBuffer(size); |
| 1078 | std::array<BufferCopy, 1> copies{ | 693 | std::array<BufferCopy, 1> copies{ |
| @@ -1155,7 +770,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 1155 | TouchBuffer(buffer, binding.buffer_id); | 770 | TouchBuffer(buffer, binding.buffer_id); |
| 1156 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 771 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 1157 | size <= uniform_buffer_skip_cache_size && | 772 | size <= uniform_buffer_skip_cache_size && |
| 1158 | !buffer.IsRegionGpuModified(cpu_addr, size); | 773 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); |
| 1159 | if (use_fast_buffer) { | 774 | if (use_fast_buffer) { |
| 1160 | if constexpr (IS_OPENGL) { | 775 | if constexpr (IS_OPENGL) { |
| 1161 | if (runtime.HasFastBufferSubData()) { | 776 | if (runtime.HasFastBufferSubData()) { |
| @@ -1378,27 +993,36 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1378 | // We have to check for the dirty flags and index count | 993 | // We have to check for the dirty flags and index count |
| 1379 | // The index count is currently changed without updating the dirty flags | 994 | // The index count is currently changed without updating the dirty flags |
| 1380 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 995 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1381 | const auto& index_array = draw_state.index_buffer; | 996 | const auto& index_buffer_ref = draw_state.index_buffer; |
| 1382 | auto& flags = maxwell3d->dirty.flags; | 997 | auto& flags = maxwell3d->dirty.flags; |
| 1383 | if (!flags[Dirty::IndexBuffer]) { | 998 | if (!flags[Dirty::IndexBuffer]) { |
| 1384 | return; | 999 | return; |
| 1385 | } | 1000 | } |
| 1386 | flags[Dirty::IndexBuffer] = false; | 1001 | flags[Dirty::IndexBuffer] = false; |
| 1387 | last_index_count = index_array.count; | 1002 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 1388 | if (!draw_state.inline_index_draw_indexes.empty()) { | ||
| 1389 | auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); | 1003 | auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); |
| 1004 | u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE); | ||
| 1005 | if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] { | ||
| 1006 | inline_buffer_id = CreateBuffer(0, buffer_size); | ||
| 1007 | } | ||
| 1008 | if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] { | ||
| 1009 | slot_buffers.erase(inline_buffer_id); | ||
| 1010 | inline_buffer_id = CreateBuffer(0, buffer_size); | ||
| 1011 | } | ||
| 1390 | index_buffer = Binding{ | 1012 | index_buffer = Binding{ |
| 1391 | .cpu_addr = 0, | 1013 | .cpu_addr = 0, |
| 1392 | .size = inline_index_size, | 1014 | .size = inline_index_size, |
| 1393 | .buffer_id = CreateBuffer(0, inline_index_size), | 1015 | .buffer_id = inline_buffer_id, |
| 1394 | }; | 1016 | }; |
| 1395 | return; | 1017 | return; |
| 1396 | } | 1018 | } |
| 1397 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | 1019 | |
| 1398 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1020 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); |
| 1021 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); | ||
| 1399 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1022 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1400 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1023 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1401 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); | 1024 | const u32 draw_size = |
| 1025 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | ||
| 1402 | const u32 size = std::min(address_size, draw_size); | 1026 | const u32 size = std::min(address_size, draw_size); |
| 1403 | if (size == 0 || !cpu_addr) { | 1027 | if (size == 0 || !cpu_addr) { |
| 1404 | index_buffer = NULL_BINDING; | 1028 | index_buffer = NULL_BINDING; |
| @@ -1434,17 +1058,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1434 | const GPUVAddr gpu_addr_begin = array.Address(); | 1058 | const GPUVAddr gpu_addr_begin = array.Address(); |
| 1435 | const GPUVAddr gpu_addr_end = limit.Address() + 1; | 1059 | const GPUVAddr gpu_addr_end = limit.Address() + 1; |
| 1436 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1060 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1437 | u32 address_size = static_cast<u32>( | 1061 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1438 | std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); | 1062 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1439 | if (array.enable == 0 || address_size == 0 || !cpu_addr) { | 1063 | if (array.enable == 0 || size == 0 || !cpu_addr) { |
| 1440 | vertex_buffers[index] = NULL_BINDING; | 1064 | vertex_buffers[index] = NULL_BINDING; |
| 1441 | return; | 1065 | return; |
| 1442 | } | 1066 | } |
| 1443 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1067 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1444 | address_size = | 1068 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1445 | static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size)); | ||
| 1446 | } | 1069 | } |
| 1447 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 1448 | vertex_buffers[index] = Binding{ | 1070 | vertex_buffers[index] = Binding{ |
| 1449 | .cpu_addr = *cpu_addr, | 1071 | .cpu_addr = *cpu_addr, |
| 1450 | .size = size, | 1072 | .size = size, |
| @@ -1591,17 +1213,16 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { | |||
| 1591 | 1213 | ||
| 1592 | template <class P> | 1214 | template <class P> |
| 1593 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | 1215 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { |
| 1594 | Buffer& buffer = slot_buffers[buffer_id]; | 1216 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); |
| 1595 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | 1217 | |
| 1218 | if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) { | ||
| 1219 | SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size); | ||
| 1220 | } | ||
| 1596 | 1221 | ||
| 1597 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1222 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; |
| 1598 | common_ranges.add(base_interval); | 1223 | common_ranges.add(base_interval); |
| 1599 | |||
| 1600 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 1601 | if (!is_async) { | ||
| 1602 | return; | ||
| 1603 | } | ||
| 1604 | uncommitted_ranges.add(base_interval); | 1224 | uncommitted_ranges.add(base_interval); |
| 1225 | pending_ranges.add(base_interval); | ||
| 1605 | } | 1226 | } |
| 1606 | 1227 | ||
| 1607 | template <class P> | 1228 | template <class P> |
| @@ -1609,7 +1230,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | |||
| 1609 | if (cpu_addr == 0) { | 1230 | if (cpu_addr == 0) { |
| 1610 | return NULL_BUFFER_ID; | 1231 | return NULL_BUFFER_ID; |
| 1611 | } | 1232 | } |
| 1612 | const u64 page = cpu_addr >> YUZU_PAGEBITS; | 1233 | const u64 page = cpu_addr >> CACHING_PAGEBITS; |
| 1613 | const BufferId buffer_id = page_table[page]; | 1234 | const BufferId buffer_id = page_table[page]; |
| 1614 | if (!buffer_id) { | 1235 | if (!buffer_id) { |
| 1615 | return CreateBuffer(cpu_addr, size); | 1236 | return CreateBuffer(cpu_addr, size); |
| @@ -1638,9 +1259,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1638 | .has_stream_leap = has_stream_leap, | 1259 | .has_stream_leap = has_stream_leap, |
| 1639 | }; | 1260 | }; |
| 1640 | } | 1261 | } |
| 1641 | for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE); | 1262 | for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); |
| 1642 | cpu_addr += YUZU_PAGESIZE) { | 1263 | cpu_addr += CACHING_PAGESIZE) { |
| 1643 | const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS]; | 1264 | const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; |
| 1644 | if (!overlap_id) { | 1265 | if (!overlap_id) { |
| 1645 | continue; | 1266 | continue; |
| 1646 | } | 1267 | } |
| @@ -1666,11 +1287,11 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1666 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | 1287 | // as a stream buffer. Increase the size to skip constantly recreating buffers. |
| 1667 | has_stream_leap = true; | 1288 | has_stream_leap = true; |
| 1668 | if (expands_right) { | 1289 | if (expands_right) { |
| 1669 | begin -= YUZU_PAGESIZE * 256; | 1290 | begin -= CACHING_PAGESIZE * 256; |
| 1670 | cpu_addr = begin; | 1291 | cpu_addr = begin; |
| 1671 | } | 1292 | } |
| 1672 | if (expands_left) { | 1293 | if (expands_left) { |
| 1673 | end += YUZU_PAGESIZE * 256; | 1294 | end += CACHING_PAGESIZE * 256; |
| 1674 | } | 1295 | } |
| 1675 | } | 1296 | } |
| 1676 | } | 1297 | } |
| @@ -1690,25 +1311,22 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1690 | if (accumulate_stream_score) { | 1311 | if (accumulate_stream_score) { |
| 1691 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); | 1312 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); |
| 1692 | } | 1313 | } |
| 1693 | std::vector<BufferCopy> copies; | 1314 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1694 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); | 1315 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); |
| 1695 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { | 1316 | copies.push_back(BufferCopy{ |
| 1696 | copies.push_back(BufferCopy{ | 1317 | .src_offset = 0, |
| 1697 | .src_offset = begin, | 1318 | .dst_offset = dst_base_offset, |
| 1698 | .dst_offset = dst_base_offset + begin, | 1319 | .size = overlap.SizeBytes(), |
| 1699 | .size = range_size, | ||
| 1700 | }); | ||
| 1701 | new_buffer.UnmarkRegionAsCpuModified(begin, range_size); | ||
| 1702 | new_buffer.MarkRegionAsGpuModified(begin, range_size); | ||
| 1703 | }); | 1320 | }); |
| 1704 | if (!copies.empty()) { | 1321 | runtime.CopyBuffer(new_buffer, overlap, copies); |
| 1705 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); | 1322 | DeleteBuffer(overlap_id, true); |
| 1706 | } | ||
| 1707 | DeleteBuffer(overlap_id); | ||
| 1708 | } | 1323 | } |
| 1709 | 1324 | ||
| 1710 | template <class P> | 1325 | template <class P> |
| 1711 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | 1326 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { |
| 1327 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); | ||
| 1328 | cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); | ||
| 1329 | wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); | ||
| 1712 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1330 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1713 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1331 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1714 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1332 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| @@ -1718,7 +1336,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1718 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1336 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1719 | } | 1337 | } |
| 1720 | Register(new_buffer_id); | 1338 | Register(new_buffer_id); |
| 1721 | TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); | 1339 | TouchBuffer(new_buffer, new_buffer_id); |
| 1722 | return new_buffer_id; | 1340 | return new_buffer_id; |
| 1723 | } | 1341 | } |
| 1724 | 1342 | ||
| @@ -1746,8 +1364,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1746 | } | 1364 | } |
| 1747 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1365 | const VAddr cpu_addr_begin = buffer.CpuAddr(); |
| 1748 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1366 | const VAddr cpu_addr_end = cpu_addr_begin + size; |
| 1749 | const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE; | 1367 | const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; |
| 1750 | const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE); | 1368 | const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); |
| 1751 | for (u64 page = page_begin; page != page_end; ++page) { | 1369 | for (u64 page = page_begin; page != page_end; ++page) { |
| 1752 | if constexpr (insert) { | 1370 | if constexpr (insert) { |
| 1753 | page_table[page] = buffer_id; | 1371 | page_table[page] = buffer_id; |
| @@ -1766,9 +1384,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1766 | 1384 | ||
| 1767 | template <class P> | 1385 | template <class P> |
| 1768 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1386 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1769 | if (buffer.CpuAddr() == 0) { | ||
| 1770 | return true; | ||
| 1771 | } | ||
| 1772 | return SynchronizeBufferImpl(buffer, cpu_addr, size); | 1387 | return SynchronizeBufferImpl(buffer, cpu_addr, size); |
| 1773 | } | 1388 | } |
| 1774 | 1389 | ||
| @@ -1777,10 +1392,11 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1777 | boost::container::small_vector<BufferCopy, 4> copies; | 1392 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1778 | u64 total_size_bytes = 0; | 1393 | u64 total_size_bytes = 0; |
| 1779 | u64 largest_copy = 0; | 1394 | u64 largest_copy = 0; |
| 1780 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1395 | VAddr buffer_start = buffer.CpuAddr(); |
| 1396 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1781 | copies.push_back(BufferCopy{ | 1397 | copies.push_back(BufferCopy{ |
| 1782 | .src_offset = total_size_bytes, | 1398 | .src_offset = total_size_bytes, |
| 1783 | .dst_offset = range_offset, | 1399 | .dst_offset = cpu_addr_out - buffer_start, |
| 1784 | .size = range_size, | 1400 | .size = range_size, |
| 1785 | }); | 1401 | }); |
| 1786 | total_size_bytes += range_size; | 1402 | total_size_bytes += range_size; |
| @@ -1795,6 +1411,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1795 | } | 1411 | } |
| 1796 | 1412 | ||
| 1797 | template <class P> | 1413 | template <class P> |
| 1414 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1415 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1416 | u64 total_size_bytes = 0; | ||
| 1417 | u64 largest_copy = 0; | ||
| 1418 | IntervalSet found_sets{}; | ||
| 1419 | auto make_copies = [&] { | ||
| 1420 | for (auto& interval : found_sets) { | ||
| 1421 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1422 | const VAddr cpu_addr_ = interval.lower(); | ||
| 1423 | copies.push_back(BufferCopy{ | ||
| 1424 | .src_offset = total_size_bytes, | ||
| 1425 | .dst_offset = cpu_addr_ - buffer.CpuAddr(), | ||
| 1426 | .size = sub_size, | ||
| 1427 | }); | ||
| 1428 | total_size_bytes += sub_size; | ||
| 1429 | largest_copy = std::max(largest_copy, sub_size); | ||
| 1430 | } | ||
| 1431 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1432 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1433 | }; | ||
| 1434 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1435 | const VAddr base_adr = cpu_addr_out; | ||
| 1436 | const VAddr end_adr = base_adr + range_size; | ||
| 1437 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1438 | found_sets.add(add_interval); | ||
| 1439 | }); | ||
| 1440 | if (found_sets.empty()) { | ||
| 1441 | return true; | ||
| 1442 | } | ||
| 1443 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1444 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1445 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1446 | if (it == common_ranges.end()) { | ||
| 1447 | make_copies(); | ||
| 1448 | return false; | ||
| 1449 | } | ||
| 1450 | while (it != it_end) { | ||
| 1451 | found_sets.subtract(*it); | ||
| 1452 | it++; | ||
| 1453 | } | ||
| 1454 | make_copies(); | ||
| 1455 | return false; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | template <class P> | ||
| 1798 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1459 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1799 | std::span<BufferCopy> copies) { | 1460 | std::span<BufferCopy> copies) { |
| 1800 | if constexpr (USE_MEMORY_MAPS) { | 1461 | if constexpr (USE_MEMORY_MAPS) { |
| @@ -1805,39 +1466,45 @@ void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 larg | |||
| 1805 | } | 1466 | } |
| 1806 | 1467 | ||
| 1807 | template <class P> | 1468 | template <class P> |
| 1808 | void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | 1469 | void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1809 | std::span<const BufferCopy> copies) { | 1470 | [[maybe_unused]] u64 largest_copy, |
| 1810 | std::span<u8> immediate_buffer; | 1471 | [[maybe_unused]] std::span<const BufferCopy> copies) { |
| 1811 | for (const BufferCopy& copy : copies) { | 1472 | if constexpr (!USE_MEMORY_MAPS) { |
| 1812 | std::span<const u8> upload_span; | 1473 | std::span<u8> immediate_buffer; |
| 1813 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1474 | for (const BufferCopy& copy : copies) { |
| 1814 | if (IsRangeGranular(cpu_addr, copy.size)) { | 1475 | std::span<const u8> upload_span; |
| 1815 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | 1476 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1816 | } else { | 1477 | if (IsRangeGranular(cpu_addr, copy.size)) { |
| 1817 | if (immediate_buffer.empty()) { | 1478 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); |
| 1818 | immediate_buffer = ImmediateBuffer(largest_copy); | 1479 | } else { |
| 1480 | if (immediate_buffer.empty()) { | ||
| 1481 | immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1482 | } | ||
| 1483 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1484 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 1819 | } | 1485 | } |
| 1820 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 1486 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| 1821 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 1822 | } | 1487 | } |
| 1823 | buffer.ImmediateUpload(copy.dst_offset, upload_span); | ||
| 1824 | } | 1488 | } |
| 1825 | } | 1489 | } |
| 1826 | 1490 | ||
| 1827 | template <class P> | 1491 | template <class P> |
| 1828 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | 1492 | void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1829 | std::span<BufferCopy> copies) { | 1493 | [[maybe_unused]] u64 total_size_bytes, |
| 1830 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); | 1494 | [[maybe_unused]] std::span<BufferCopy> copies) { |
| 1831 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1495 | if constexpr (USE_MEMORY_MAPS) { |
| 1832 | for (BufferCopy& copy : copies) { | 1496 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); |
| 1833 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1497 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1834 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1498 | for (BufferCopy& copy : copies) { |
| 1835 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1499 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1500 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1501 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | ||
| 1836 | 1502 | ||
| 1837 | // Apply the staging offset | 1503 | // Apply the staging offset |
| 1838 | copy.src_offset += upload_staging.offset; | 1504 | copy.src_offset += upload_staging.offset; |
| 1505 | } | ||
| 1506 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1839 | } | 1507 | } |
| 1840 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1841 | } | 1508 | } |
| 1842 | 1509 | ||
| 1843 | template <class P> | 1510 | template <class P> |
| @@ -1847,7 +1514,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1847 | if (!is_dirty) { | 1514 | if (!is_dirty) { |
| 1848 | return false; | 1515 | return false; |
| 1849 | } | 1516 | } |
| 1850 | if (!IsRegionGpuModified(dest_address, copy_size)) { | 1517 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); |
| 1518 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | ||
| 1519 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | ||
| 1851 | return false; | 1520 | return false; |
| 1852 | } | 1521 | } |
| 1853 | 1522 | ||
| @@ -1886,30 +1555,31 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1886 | boost::container::small_vector<BufferCopy, 1> copies; | 1555 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1887 | u64 total_size_bytes = 0; | 1556 | u64 total_size_bytes = 0; |
| 1888 | u64 largest_copy = 0; | 1557 | u64 largest_copy = 0; |
| 1889 | buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1558 | memory_tracker.ForEachDownloadRangeAndClear( |
| 1890 | const VAddr buffer_addr = buffer.CpuAddr(); | 1559 | cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { |
| 1891 | const auto add_download = [&](VAddr start, VAddr end) { | 1560 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 1892 | const u64 new_offset = start - buffer_addr; | 1561 | const auto add_download = [&](VAddr start, VAddr end) { |
| 1893 | const u64 new_size = end - start; | 1562 | const u64 new_offset = start - buffer_addr; |
| 1894 | copies.push_back(BufferCopy{ | 1563 | const u64 new_size = end - start; |
| 1895 | .src_offset = new_offset, | 1564 | copies.push_back(BufferCopy{ |
| 1896 | .dst_offset = total_size_bytes, | 1565 | .src_offset = new_offset, |
| 1897 | .size = new_size, | 1566 | .dst_offset = total_size_bytes, |
| 1898 | }); | 1567 | .size = new_size, |
| 1899 | // Align up to avoid cache conflicts | 1568 | }); |
| 1900 | constexpr u64 align = 256ULL; | 1569 | // Align up to avoid cache conflicts |
| 1901 | constexpr u64 mask = ~(align - 1ULL); | 1570 | constexpr u64 align = 64ULL; |
| 1902 | total_size_bytes += (new_size + align - 1) & mask; | 1571 | constexpr u64 mask = ~(align - 1ULL); |
| 1903 | largest_copy = std::max(largest_copy, new_size); | 1572 | total_size_bytes += (new_size + align - 1) & mask; |
| 1904 | }; | 1573 | largest_copy = std::max(largest_copy, new_size); |
| 1905 | 1574 | }; | |
| 1906 | const VAddr start_address = buffer_addr + range_offset; | 1575 | |
| 1907 | const VAddr end_address = start_address + range_size; | 1576 | const VAddr start_address = cpu_addr_out; |
| 1908 | ForEachWrittenRange(start_address, range_size, add_download); | 1577 | const VAddr end_address = start_address + range_size; |
| 1909 | const IntervalType subtract_interval{start_address, end_address}; | 1578 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); |
| 1910 | ClearDownload(subtract_interval); | 1579 | const IntervalType subtract_interval{start_address, end_address}; |
| 1911 | common_ranges.subtract(subtract_interval); | 1580 | ClearDownload(subtract_interval); |
| 1912 | }); | 1581 | common_ranges.subtract(subtract_interval); |
| 1582 | }); | ||
| 1913 | if (total_size_bytes == 0) { | 1583 | if (total_size_bytes == 0) { |
| 1914 | return; | 1584 | return; |
| 1915 | } | 1585 | } |
| @@ -1943,7 +1613,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1943 | } | 1613 | } |
| 1944 | 1614 | ||
| 1945 | template <class P> | 1615 | template <class P> |
| 1946 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1616 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { |
| 1947 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1617 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1948 | if (binding.buffer_id == buffer_id) { | 1618 | if (binding.buffer_id == buffer_id) { |
| 1949 | binding.buffer_id = BufferId{}; | 1619 | binding.buffer_id = BufferId{}; |
| @@ -1962,8 +1632,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1962 | std::erase(cached_write_buffer_ids, buffer_id); | 1632 | std::erase(cached_write_buffer_ids, buffer_id); |
| 1963 | 1633 | ||
| 1964 | // Mark the whole buffer as CPU written to stop tracking CPU writes | 1634 | // Mark the whole buffer as CPU written to stop tracking CPU writes |
| 1965 | Buffer& buffer = slot_buffers[buffer_id]; | 1635 | if (!do_not_mark) { |
| 1966 | buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | 1636 | Buffer& buffer = slot_buffers[buffer_id]; |
| 1637 | memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1638 | } | ||
| 1967 | 1639 | ||
| 1968 | Unregister(buffer_id); | 1640 | Unregister(buffer_id); |
| 1969 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1641 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| @@ -2011,7 +1683,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 2011 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1683 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 2012 | return NULL_BINDING; | 1684 | return NULL_BINDING; |
| 2013 | } | 1685 | } |
| 2014 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1686 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); |
| 2015 | const Binding binding{ | 1687 | const Binding binding{ |
| 2016 | .cpu_addr = *cpu_addr, | 1688 | .cpu_addr = *cpu_addr, |
| 2017 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), | 1689 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h new file mode 100644 index 000000000..656baa550 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -0,0 +1,580 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 11 | #include <numeric> | ||
| 12 | #include <span> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include <boost/container/small_vector.hpp> | ||
| 17 | #define BOOST_NO_MT | ||
| 18 | #include <boost/pool/detail/mutex.hpp> | ||
| 19 | #undef BOOST_NO_MT | ||
| 20 | #include <boost/icl/interval.hpp> | ||
| 21 | #include <boost/icl/interval_base_set.hpp> | ||
| 22 | #include <boost/icl/interval_set.hpp> | ||
| 23 | #include <boost/icl/split_interval_map.hpp> | ||
| 24 | #include <boost/pool/pool.hpp> | ||
| 25 | #include <boost/pool/pool_alloc.hpp> | ||
| 26 | #include <boost/pool/poolfwd.hpp> | ||
| 27 | |||
| 28 | #include "common/common_types.h" | ||
| 29 | #include "common/div_ceil.h" | ||
| 30 | #include "common/literals.h" | ||
| 31 | #include "common/lru_cache.h" | ||
| 32 | #include "common/microprofile.h" | ||
| 33 | #include "common/scope_exit.h" | ||
| 34 | #include "common/settings.h" | ||
| 35 | #include "core/memory.h" | ||
| 36 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 37 | #include "video_core/control/channel_state_cache.h" | ||
| 38 | #include "video_core/delayed_destruction_ring.h" | ||
| 39 | #include "video_core/dirty_flags.h" | ||
| 40 | #include "video_core/engines/draw_manager.h" | ||
| 41 | #include "video_core/engines/kepler_compute.h" | ||
| 42 | #include "video_core/engines/maxwell_3d.h" | ||
| 43 | #include "video_core/memory_manager.h" | ||
| 44 | #include "video_core/rasterizer_interface.h" | ||
| 45 | #include "video_core/surface.h" | ||
| 46 | #include "video_core/texture_cache/slot_vector.h" | ||
| 47 | #include "video_core/texture_cache/types.h" | ||
| 48 | |||
| 49 | namespace boost { | ||
| 50 | template <typename T> | ||
| 51 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||
| 52 | } | ||
| 53 | |||
| 54 | namespace VideoCommon { | ||
| 55 | |||
| 56 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | ||
| 57 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 58 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 59 | |||
| 60 | using BufferId = SlotId; | ||
| 61 | |||
| 62 | using VideoCore::Surface::PixelFormat; | ||
| 63 | using namespace Common::Literals; | ||
| 64 | |||
| 65 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 66 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 67 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 68 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 69 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 70 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 71 | constexpr u32 NUM_STAGES = 5; | ||
| 72 | |||
| 73 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||
| 74 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 75 | |||
| 76 | enum class ObtainBufferSynchronize : u32 { | ||
| 77 | NoSynchronize = 0, | ||
| 78 | FullSynchronize = 1, | ||
| 79 | SynchronizeNoDirty = 2, | ||
| 80 | }; | ||
| 81 | |||
| 82 | enum class ObtainBufferOperation : u32 { | ||
| 83 | DoNothing = 0, | ||
| 84 | MarkAsWritten = 1, | ||
| 85 | DiscardWrite = 2, | ||
| 86 | MarkQuery = 3, | ||
| 87 | }; | ||
| 88 | |||
| 89 | template <typename P> | ||
| 90 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 91 | // Page size for caching purposes. | ||
| 92 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | ||
| 93 | static constexpr u32 CACHING_PAGEBITS = 16; | ||
| 94 | static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; | ||
| 95 | |||
| 96 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 97 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 98 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 99 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 100 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 101 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 102 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 103 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 104 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 105 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | ||
| 106 | |||
| 107 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 108 | |||
| 109 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | ||
| 110 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | ||
| 111 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 112 | |||
| 113 | // Debug Flags. | ||
| 114 | |||
| 115 | static constexpr bool DISABLE_DOWNLOADS = true; | ||
| 116 | |||
| 117 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 118 | |||
| 119 | using Runtime = typename P::Runtime; | ||
| 120 | using Buffer = typename P::Buffer; | ||
| 121 | using Async_Buffer = typename P::Async_Buffer; | ||
| 122 | using MemoryTracker = typename P::MemoryTracker; | ||
| 123 | |||
| 124 | using IntervalCompare = std::less<VAddr>; | ||
| 125 | using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | ||
| 126 | using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | ||
| 127 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 128 | using IntervalType = typename IntervalSet::interval_type; | ||
| 129 | |||
| 130 | template <typename Type> | ||
| 131 | struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||
| 132 | // types | ||
| 133 | typedef counter_add_functor<Type> type; | ||
| 134 | typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||
| 135 | |||
| 136 | // public member functions | ||
| 137 | void operator()(Type& current, const Type& added) const { | ||
| 138 | current += added; | ||
| 139 | if (current < base_type::identity_element()) { | ||
| 140 | current = base_type::identity_element(); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | // public static functions | ||
| 145 | static void version(Type&){}; | ||
| 146 | }; | ||
| 147 | |||
| 148 | using OverlapCombine = counter_add_functor<int>; | ||
| 149 | using OverlapSection = boost::icl::inter_section<int>; | ||
| 150 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | ||
| 151 | |||
| 152 | struct Empty {}; | ||
| 153 | |||
| 154 | struct OverlapResult { | ||
| 155 | std::vector<BufferId> ids; | ||
| 156 | VAddr begin; | ||
| 157 | VAddr end; | ||
| 158 | bool has_stream_leap = false; | ||
| 159 | }; | ||
| 160 | |||
| 161 | struct Binding { | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | u32 size{}; | ||
| 164 | BufferId buffer_id; | ||
| 165 | }; | ||
| 166 | |||
| 167 | struct TextureBufferBinding : Binding { | ||
| 168 | PixelFormat format; | ||
| 169 | }; | ||
| 170 | |||
| 171 | static constexpr Binding NULL_BINDING{ | ||
| 172 | .cpu_addr = 0, | ||
| 173 | .size = 0, | ||
| 174 | .buffer_id = NULL_BUFFER_ID, | ||
| 175 | }; | ||
| 176 | |||
| 177 | public: | ||
| 178 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 179 | |||
| 180 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 181 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 182 | |||
| 183 | void TickFrame(); | ||
| 184 | |||
| 185 | void WriteMemory(VAddr cpu_addr, u64 size); | ||
| 186 | |||
| 187 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||
| 188 | |||
| 189 | void DownloadMemory(VAddr cpu_addr, u64 size); | ||
| 190 | |||
| 191 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||
| 192 | |||
| 193 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||
| 194 | |||
| 195 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||
| 196 | |||
| 197 | void UpdateGraphicsBuffers(bool is_indexed); | ||
| 198 | |||
| 199 | void UpdateComputeBuffers(); | ||
| 200 | |||
| 201 | void BindHostGeometryBuffers(bool is_indexed); | ||
| 202 | |||
| 203 | void BindHostStageBuffers(size_t stage); | ||
| 204 | |||
| 205 | void BindHostComputeBuffers(); | ||
| 206 | |||
| 207 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | ||
| 208 | const UniformBufferSizes* sizes); | ||
| 209 | |||
| 210 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); | ||
| 211 | |||
| 212 | void UnbindGraphicsStorageBuffers(size_t stage); | ||
| 213 | |||
| 214 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 215 | bool is_written); | ||
| 216 | |||
| 217 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 218 | |||
| 219 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 220 | PixelFormat format, bool is_written, bool is_image); | ||
| 221 | |||
| 222 | void UnbindComputeStorageBuffers(); | ||
| 223 | |||
| 224 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 225 | bool is_written); | ||
| 226 | |||
| 227 | void UnbindComputeTextureBuffers(); | ||
| 228 | |||
| 229 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 230 | bool is_written, bool is_image); | ||
| 231 | |||
| 232 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 233 | ObtainBufferSynchronize sync_info, | ||
| 234 | ObtainBufferOperation post_op); | ||
| 235 | void FlushCachedWrites(); | ||
| 236 | |||
| 237 | /// Return true when there are uncommitted buffers to be downloaded | ||
| 238 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 239 | |||
| 240 | void AccumulateFlushes(); | ||
| 241 | |||
| 242 | /// Return true when the caller should wait for async downloads | ||
| 243 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 244 | |||
| 245 | /// Commit asynchronous downloads | ||
| 246 | void CommitAsyncFlushes(); | ||
| 247 | void CommitAsyncFlushesHigh(); | ||
| 248 | |||
| 249 | /// Pop asynchronous downloads | ||
| 250 | void PopAsyncFlushes(); | ||
| 251 | void PopAsyncBuffers(); | ||
| 252 | |||
| 253 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 254 | |||
| 255 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 256 | |||
| 257 | /// Return true when a CPU region is modified from the GPU | ||
| 258 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 259 | |||
| 260 | /// Return true when a region is registered on the cache | ||
| 261 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 262 | |||
| 263 | /// Return true when a CPU region is modified from the CPU | ||
| 264 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
| 265 | |||
| 266 | void SetDrawIndirect( | ||
| 267 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 268 | current_draw_indirect = current_draw_indirect_; | ||
| 269 | } | ||
| 270 | |||
| 271 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 272 | |||
| 273 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 274 | |||
| 275 | std::recursive_mutex mutex; | ||
| 276 | Runtime& runtime; | ||
| 277 | |||
| 278 | private: | ||
| 279 | template <typename Func> | ||
| 280 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 281 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 282 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 283 | index += disabled_bits; | ||
| 284 | enabled_mask >>= disabled_bits; | ||
| 285 | func(index); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | template <typename Func> | ||
| 290 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 291 | const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | ||
| 292 | for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | ||
| 293 | const BufferId buffer_id = page_table[page]; | ||
| 294 | if (!buffer_id) { | ||
| 295 | ++page; | ||
| 296 | continue; | ||
| 297 | } | ||
| 298 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 299 | func(buffer_id, buffer); | ||
| 300 | |||
| 301 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 302 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | template <typename Func> | ||
| 307 | void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | ||
| 308 | const VAddr start_address = cpu_addr; | ||
| 309 | const VAddr end_address = start_address + size; | ||
| 310 | const IntervalType search_interval{start_address, end_address}; | ||
| 311 | auto it = current_range.lower_bound(search_interval); | ||
| 312 | if (it == current_range.end()) { | ||
| 313 | return; | ||
| 314 | } | ||
| 315 | auto end_it = current_range.upper_bound(search_interval); | ||
| 316 | for (; it != end_it; it++) { | ||
| 317 | VAddr inter_addr_end = it->upper(); | ||
| 318 | VAddr inter_addr = it->lower(); | ||
| 319 | if (inter_addr_end > end_address) { | ||
| 320 | inter_addr_end = end_address; | ||
| 321 | } | ||
| 322 | if (inter_addr < start_address) { | ||
| 323 | inter_addr = start_address; | ||
| 324 | } | ||
| 325 | func(inter_addr, inter_addr_end); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | template <typename Func> | ||
| 330 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||
| 331 | Func&& func) { | ||
| 332 | const VAddr start_address = cpu_addr; | ||
| 333 | const VAddr end_address = start_address + size; | ||
| 334 | const IntervalType search_interval{start_address, end_address}; | ||
| 335 | auto it = current_range.lower_bound(search_interval); | ||
| 336 | if (it == current_range.end()) { | ||
| 337 | return; | ||
| 338 | } | ||
| 339 | auto end_it = current_range.upper_bound(search_interval); | ||
| 340 | for (; it != end_it; it++) { | ||
| 341 | auto& inter = it->first; | ||
| 342 | VAddr inter_addr_end = inter.upper(); | ||
| 343 | VAddr inter_addr = inter.lower(); | ||
| 344 | if (inter_addr_end > end_address) { | ||
| 345 | inter_addr_end = end_address; | ||
| 346 | } | ||
| 347 | if (inter_addr < start_address) { | ||
| 348 | inter_addr = start_address; | ||
| 349 | } | ||
| 350 | func(inter_addr, inter_addr_end, it->second); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | void RemoveEachInOverlapCounter(OverlapCounter& current_range, | ||
| 355 | const IntervalType search_interval, int subtract_value) { | ||
| 356 | bool any_removals = false; | ||
| 357 | current_range.add(std::make_pair(search_interval, subtract_value)); | ||
| 358 | do { | ||
| 359 | any_removals = false; | ||
| 360 | auto it = current_range.lower_bound(search_interval); | ||
| 361 | if (it == current_range.end()) { | ||
| 362 | return; | ||
| 363 | } | ||
| 364 | auto end_it = current_range.upper_bound(search_interval); | ||
| 365 | for (; it != end_it; it++) { | ||
| 366 | if (it->second <= 0) { | ||
| 367 | any_removals = true; | ||
| 368 | current_range.erase(it); | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | } while (any_removals); | ||
| 373 | } | ||
| 374 | |||
| 375 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||
| 376 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||
| 377 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||
| 378 | } | ||
| 379 | |||
| 380 | void RunGarbageCollector(); | ||
| 381 | |||
| 382 | void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size); | ||
| 383 | |||
| 384 | void BindHostIndexBuffer(); | ||
| 385 | |||
| 386 | void BindHostVertexBuffers(); | ||
| 387 | |||
| 388 | void BindHostDrawIndirectBuffers(); | ||
| 389 | |||
| 390 | void BindHostGraphicsUniformBuffers(size_t stage); | ||
| 391 | |||
| 392 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | ||
| 393 | |||
| 394 | void BindHostGraphicsStorageBuffers(size_t stage); | ||
| 395 | |||
| 396 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 397 | |||
| 398 | void BindHostTransformFeedbackBuffers(); | ||
| 399 | |||
| 400 | void BindHostComputeUniformBuffers(); | ||
| 401 | |||
| 402 | void BindHostComputeStorageBuffers(); | ||
| 403 | |||
| 404 | void BindHostComputeTextureBuffers(); | ||
| 405 | |||
| 406 | void DoUpdateGraphicsBuffers(bool is_indexed); | ||
| 407 | |||
| 408 | void DoUpdateComputeBuffers(); | ||
| 409 | |||
| 410 | void UpdateIndexBuffer(); | ||
| 411 | |||
| 412 | void UpdateVertexBuffers(); | ||
| 413 | |||
| 414 | void UpdateVertexBuffer(u32 index); | ||
| 415 | |||
| 416 | void UpdateDrawIndirect(); | ||
| 417 | |||
| 418 | void UpdateUniformBuffers(size_t stage); | ||
| 419 | |||
| 420 | void UpdateStorageBuffers(size_t stage); | ||
| 421 | |||
| 422 | void UpdateTextureBuffers(size_t stage); | ||
| 423 | |||
| 424 | void UpdateTransformFeedbackBuffers(); | ||
| 425 | |||
| 426 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 427 | |||
| 428 | void UpdateComputeUniformBuffers(); | ||
| 429 | |||
| 430 | void UpdateComputeStorageBuffers(); | ||
| 431 | |||
| 432 | void UpdateComputeTextureBuffers(); | ||
| 433 | |||
| 434 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 435 | |||
| 436 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 437 | |||
| 438 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 439 | |||
| 440 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 441 | |||
| 442 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 443 | |||
| 444 | void Register(BufferId buffer_id); | ||
| 445 | |||
| 446 | void Unregister(BufferId buffer_id); | ||
| 447 | |||
| 448 | template <bool insert> | ||
| 449 | void ChangeRegister(BufferId buffer_id); | ||
| 450 | |||
| 451 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||
| 452 | |||
| 453 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 454 | |||
| 455 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 456 | |||
| 457 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 458 | |||
| 459 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 460 | std::span<BufferCopy> copies); | ||
| 461 | |||
| 462 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 463 | std::span<const BufferCopy> copies); | ||
| 464 | |||
| 465 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 466 | |||
| 467 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 468 | |||
| 469 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 470 | |||
| 471 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | ||
| 472 | |||
| 473 | void NotifyBufferDeletion(); | ||
| 474 | |||
| 475 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | ||
| 476 | bool is_written) const; | ||
| 477 | |||
| 478 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 479 | PixelFormat format); | ||
| 480 | |||
| 481 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 482 | |||
| 483 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 484 | |||
| 485 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 486 | |||
| 487 | void ClearDownload(IntervalType subtract_interval); | ||
| 488 | |||
| 489 | VideoCore::RasterizerInterface& rasterizer; | ||
| 490 | Core::Memory::Memory& cpu_memory; | ||
| 491 | |||
| 492 | SlotVector<Buffer> slot_buffers; | ||
| 493 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 494 | |||
| 495 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 496 | |||
| 497 | u32 last_index_count = 0; | ||
| 498 | |||
| 499 | Binding index_buffer; | ||
| 500 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 501 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 502 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 503 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 504 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 505 | Binding count_buffer_binding; | ||
| 506 | Binding indirect_buffer_binding; | ||
| 507 | |||
| 508 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 509 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 510 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 511 | |||
| 512 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 513 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 514 | |||
| 515 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 516 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 517 | |||
| 518 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 519 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 520 | u32 enabled_compute_storage_buffers = 0; | ||
| 521 | u32 written_compute_storage_buffers = 0; | ||
| 522 | |||
| 523 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 524 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 525 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 526 | u32 enabled_compute_texture_buffers = 0; | ||
| 527 | u32 written_compute_texture_buffers = 0; | ||
| 528 | u32 image_compute_texture_buffers = 0; | ||
| 529 | |||
| 530 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 531 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 532 | |||
| 533 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 534 | |||
| 535 | bool has_deleted_buffers = false; | ||
| 536 | |||
| 537 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | ||
| 538 | dirty_uniform_buffers{}; | ||
| 539 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 540 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 541 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 542 | uniform_buffer_binding_sizes{}; | ||
| 543 | |||
| 544 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 545 | |||
| 546 | MemoryTracker memory_tracker; | ||
| 547 | IntervalSet uncommitted_ranges; | ||
| 548 | IntervalSet common_ranges; | ||
| 549 | IntervalSet cached_ranges; | ||
| 550 | IntervalSet pending_ranges; | ||
| 551 | std::deque<IntervalSet> committed_ranges; | ||
| 552 | |||
| 553 | // Async Buffers | ||
| 554 | OverlapCounter async_downloads; | ||
| 555 | std::deque<std::optional<Async_Buffer>> async_buffers; | ||
| 556 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; | ||
| 557 | std::optional<Async_Buffer> current_buffer; | ||
| 558 | |||
| 559 | std::deque<Async_Buffer> async_buffers_death_ring; | ||
| 560 | |||
| 561 | size_t immediate_buffer_capacity = 0; | ||
| 562 | Common::ScratchBuffer<u8> immediate_buffer_alloc; | ||
| 563 | |||
| 564 | struct LRUItemParams { | ||
| 565 | using ObjectType = BufferId; | ||
| 566 | using TickType = u64; | ||
| 567 | }; | ||
| 568 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 569 | u64 frame_tick = 0; | ||
| 570 | u64 total_used_memory = 0; | ||
| 571 | u64 minimum_memory = 0; | ||
| 572 | u64 critical_memory = 0; | ||
| 573 | BufferId inline_buffer_id; | ||
| 574 | |||
| 575 | bool active_async_buffers = false; | ||
| 576 | |||
| 577 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||
| 578 | }; | ||
| 579 | |||
| 580 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h new file mode 100644 index 000000000..4bc59017f --- /dev/null +++ b/src/video_core/buffer_cache/memory_tracker_base.h | |||
| @@ -0,0 +1,271 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <deque> | ||
| 9 | #include <limits> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/buffer_cache/word_manager.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | template <class RasterizerInterface> | ||
| 21 | class MemoryTrackerBase { | ||
| 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 39; | ||
| 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; | ||
| 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | ||
| 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | ||
| 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | ||
| 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; | ||
| 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | ||
| 29 | using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | ||
| 30 | |||
| 31 | public: | ||
| 32 | MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | ||
| 33 | ~MemoryTrackerBase() = default; | ||
| 34 | |||
| 35 | /// Returns the inclusive CPU modified range in a begin end pair | ||
| 36 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||
| 37 | u64 query_size) noexcept { | ||
| 38 | return IteratePairs<true>( | ||
| 39 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 40 | return manager->template ModifiedRegion<Type::CPU>(offset, size); | ||
| 41 | }); | ||
| 42 | } | ||
| 43 | |||
| 44 | /// Returns the inclusive GPU modified range in a begin end pair | ||
| 45 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||
| 46 | u64 query_size) noexcept { | ||
| 47 | return IteratePairs<false>( | ||
| 48 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 49 | return manager->template ModifiedRegion<Type::GPU>(offset, size); | ||
| 50 | }); | ||
| 51 | } | ||
| 52 | |||
| 53 | /// Returns true if a region has been modified from the CPU | ||
| 54 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 55 | return IteratePages<true>( | ||
| 56 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 57 | return manager->template IsRegionModified<Type::CPU>(offset, size); | ||
| 58 | }); | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Returns true if a region has been modified from the GPU | ||
| 62 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 63 | return IteratePages<false>( | ||
| 64 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 65 | return manager->template IsRegionModified<Type::GPU>(offset, size); | ||
| 66 | }); | ||
| 67 | } | ||
| 68 | |||
| 69 | /// Mark region as CPU modified, notifying the rasterizer about this change | ||
| 70 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 71 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 72 | [](Manager* manager, u64 offset, size_t size) { | ||
| 73 | manager->template ChangeRegionState<Type::CPU, true>( | ||
| 74 | manager->GetCpuAddr() + offset, size); | ||
| 75 | }); | ||
| 76 | } | ||
| 77 | |||
| 78 | /// Unmark region as CPU modified, notifying the rasterizer about this change | ||
| 79 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 80 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 81 | [](Manager* manager, u64 offset, size_t size) { | ||
| 82 | manager->template ChangeRegionState<Type::CPU, false>( | ||
| 83 | manager->GetCpuAddr() + offset, size); | ||
| 84 | }); | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Mark region as modified from the host GPU | ||
| 88 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { | ||
| 89 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 90 | [](Manager* manager, u64 offset, size_t size) { | ||
| 91 | manager->template ChangeRegionState<Type::GPU, true>( | ||
| 92 | manager->GetCpuAddr() + offset, size); | ||
| 93 | }); | ||
| 94 | } | ||
| 95 | |||
| 96 | /// Unmark region as modified from the host GPU | ||
| 97 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { | ||
| 98 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 99 | [](Manager* manager, u64 offset, size_t size) { | ||
| 100 | manager->template ChangeRegionState<Type::GPU, false>( | ||
| 101 | manager->GetCpuAddr() + offset, size); | ||
| 102 | }); | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Mark region as modified from the CPU | ||
| 106 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 107 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 108 | IteratePages<true>( | ||
| 109 | dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) { | ||
| 110 | const VAddr cpu_address = manager->GetCpuAddr() + offset; | ||
| 111 | manager->template ChangeRegionState<Type::CachedCPU, true>(cpu_address, size); | ||
| 112 | cached_pages.insert(static_cast<u32>(cpu_address >> HIGHER_PAGE_BITS)); | ||
| 113 | }); | ||
| 114 | } | ||
| 115 | |||
| 116 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 117 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 118 | IteratePages<false>(query_cpu_addr, query_size, | ||
| 119 | [](Manager* manager, [[maybe_unused]] u64 offset, | ||
| 120 | [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); }); | ||
| 121 | } | ||
| 122 | |||
| 123 | void FlushCachedWrites() noexcept { | ||
| 124 | for (auto id : cached_pages) { | ||
| 125 | top_tier[id]->FlushCachedWrites(); | ||
| 126 | } | ||
| 127 | cached_pages.clear(); | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | ||
| 131 | template <typename Func> | ||
| 132 | void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { | ||
| 133 | IteratePages<true>(query_cpu_range, query_size, | ||
| 134 | [&func](Manager* manager, u64 offset, size_t size) { | ||
| 135 | manager->template ForEachModifiedRange<Type::CPU, true>( | ||
| 136 | manager->GetCpuAddr() + offset, size, func); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 141 | template <typename Func> | ||
| 142 | void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) { | ||
| 143 | IteratePages<false>(query_cpu_range, query_size, | ||
| 144 | [&func, clear](Manager* manager, u64 offset, size_t size) { | ||
| 145 | if (clear) { | ||
| 146 | manager->template ForEachModifiedRange<Type::GPU, true>( | ||
| 147 | manager->GetCpuAddr() + offset, size, func); | ||
| 148 | } else { | ||
| 149 | manager->template ForEachModifiedRange<Type::GPU, false>( | ||
| 150 | manager->GetCpuAddr() + offset, size, func); | ||
| 151 | } | ||
| 152 | }); | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename Func> | ||
| 156 | void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) { | ||
| 157 | IteratePages<false>(query_cpu_range, query_size, | ||
| 158 | [&func](Manager* manager, u64 offset, size_t size) { | ||
| 159 | manager->template ForEachModifiedRange<Type::GPU, true>( | ||
| 160 | manager->GetCpuAddr() + offset, size, func); | ||
| 161 | }); | ||
| 162 | } | ||
| 163 | |||
| 164 | private: | ||
| 165 | template <bool create_region_on_fail, typename Func> | ||
| 166 | bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { | ||
| 167 | using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type; | ||
| 168 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 169 | std::size_t remaining_size{size}; | ||
| 170 | std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; | ||
| 171 | u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; | ||
| 172 | while (remaining_size > 0) { | ||
| 173 | const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; | ||
| 174 | auto* manager{top_tier[page_index]}; | ||
| 175 | if (manager) { | ||
| 176 | if constexpr (BOOL_BREAK) { | ||
| 177 | if (func(manager, page_offset, copy_amount)) { | ||
| 178 | return true; | ||
| 179 | } | ||
| 180 | } else { | ||
| 181 | func(manager, page_offset, copy_amount); | ||
| 182 | } | ||
| 183 | } else if constexpr (create_region_on_fail) { | ||
| 184 | CreateRegion(page_index); | ||
| 185 | manager = top_tier[page_index]; | ||
| 186 | if constexpr (BOOL_BREAK) { | ||
| 187 | if (func(manager, page_offset, copy_amount)) { | ||
| 188 | return true; | ||
| 189 | } | ||
| 190 | } else { | ||
| 191 | func(manager, page_offset, copy_amount); | ||
| 192 | } | ||
| 193 | } | ||
| 194 | page_index++; | ||
| 195 | page_offset = 0; | ||
| 196 | remaining_size -= copy_amount; | ||
| 197 | } | ||
| 198 | return false; | ||
| 199 | } | ||
| 200 | |||
| 201 | template <bool create_region_on_fail, typename Func> | ||
| 202 | std::pair<u64, u64> IteratePairs(VAddr cpu_address, size_t size, Func&& func) { | ||
| 203 | std::size_t remaining_size{size}; | ||
| 204 | std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; | ||
| 205 | u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; | ||
| 206 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 207 | u64 end = 0; | ||
| 208 | while (remaining_size > 0) { | ||
| 209 | const std::size_t copy_amount{std::min(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; | ||
| 210 | auto* manager{top_tier[page_index]}; | ||
| 211 | const auto execute = [&] { | ||
| 212 | auto [new_begin, new_end] = func(manager, page_offset, copy_amount); | ||
| 213 | if (new_begin != 0 || new_end != 0) { | ||
| 214 | const u64 base_address = page_index << HIGHER_PAGE_BITS; | ||
| 215 | begin = std::min(new_begin + base_address, begin); | ||
| 216 | end = std::max(new_end + base_address, end); | ||
| 217 | } | ||
| 218 | }; | ||
| 219 | if (manager) { | ||
| 220 | execute(); | ||
| 221 | } else if constexpr (create_region_on_fail) { | ||
| 222 | CreateRegion(page_index); | ||
| 223 | manager = top_tier[page_index]; | ||
| 224 | execute(); | ||
| 225 | } | ||
| 226 | page_index++; | ||
| 227 | page_offset = 0; | ||
| 228 | remaining_size -= copy_amount; | ||
| 229 | } | ||
| 230 | if (begin < end) { | ||
| 231 | return std::make_pair(begin, end); | ||
| 232 | } else { | ||
| 233 | return std::make_pair(0ULL, 0ULL); | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | void CreateRegion(std::size_t page_index) { | ||
| 238 | const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; | ||
| 239 | top_tier[page_index] = GetNewManager(base_cpu_addr); | ||
| 240 | } | ||
| 241 | |||
| 242 | Manager* GetNewManager(VAddr base_cpu_addess) { | ||
| 243 | const auto on_return = [&] { | ||
| 244 | auto* new_manager = free_managers.front(); | ||
| 245 | new_manager->SetCpuAddress(base_cpu_addess); | ||
| 246 | free_managers.pop_front(); | ||
| 247 | return new_manager; | ||
| 248 | }; | ||
| 249 | if (!free_managers.empty()) { | ||
| 250 | return on_return(); | ||
| 251 | } | ||
| 252 | manager_pool.emplace_back(); | ||
| 253 | auto& last_pool = manager_pool.back(); | ||
| 254 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | ||
| 255 | new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | ||
| 256 | free_managers.push_back(&last_pool[i]); | ||
| 257 | } | ||
| 258 | return on_return(); | ||
| 259 | } | ||
| 260 | |||
| 261 | std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool; | ||
| 262 | std::deque<Manager*> free_managers; | ||
| 263 | |||
| 264 | std::array<Manager*, NUM_HIGH_PAGES> top_tier{}; | ||
| 265 | |||
| 266 | std::unordered_set<u32> cached_pages; | ||
| 267 | |||
| 268 | RasterizerInterface* rasterizer = nullptr; | ||
| 269 | }; | ||
| 270 | |||
| 271 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h new file mode 100644 index 000000000..a42455045 --- /dev/null +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -0,0 +1,462 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <limits> | ||
| 9 | #include <span> | ||
| 10 | #include <utility> | ||
| 11 | |||
| 12 | #include "common/alignment.h" | ||
| 13 | #include "common/common_funcs.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/div_ceil.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | constexpr u64 PAGES_PER_WORD = 64; | ||
| 21 | constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | ||
| 22 | constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||
| 23 | |||
| 24 | enum class Type { | ||
| 25 | CPU, | ||
| 26 | GPU, | ||
| 27 | CachedCPU, | ||
| 28 | Untracked, | ||
| 29 | }; | ||
| 30 | |||
| 31 | /// Vector tracking modified pages tightly packed with small vector optimization | ||
| 32 | template <size_t stack_words = 1> | ||
| 33 | struct WordsArray { | ||
| 34 | /// Returns the pointer to the words state | ||
| 35 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||
| 36 | return is_short ? stack.data() : heap; | ||
| 37 | } | ||
| 38 | |||
| 39 | /// Returns the pointer to the words state | ||
| 40 | [[nodiscard]] u64* Pointer(bool is_short) noexcept { | ||
| 41 | return is_short ? stack.data() : heap; | ||
| 42 | } | ||
| 43 | |||
| 44 | std::array<u64, stack_words> stack{}; ///< Small buffers storage | ||
| 45 | u64* heap; ///< Not-small buffers pointer to the storage | ||
| 46 | }; | ||
| 47 | |||
| 48 | template <size_t stack_words = 1> | ||
| 49 | struct Words { | ||
| 50 | explicit Words() = default; | ||
| 51 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||
| 52 | num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); | ||
| 53 | if (IsShort()) { | ||
| 54 | cpu.stack.fill(~u64{0}); | ||
| 55 | gpu.stack.fill(0); | ||
| 56 | cached_cpu.stack.fill(0); | ||
| 57 | untracked.stack.fill(~u64{0}); | ||
| 58 | } else { | ||
| 59 | // Share allocation between CPU and GPU pages and set their default values | ||
| 60 | u64* const alloc = new u64[num_words * 4]; | ||
| 61 | cpu.heap = alloc; | ||
| 62 | gpu.heap = alloc + num_words; | ||
| 63 | cached_cpu.heap = alloc + num_words * 2; | ||
| 64 | untracked.heap = alloc + num_words * 3; | ||
| 65 | std::fill_n(cpu.heap, num_words, ~u64{0}); | ||
| 66 | std::fill_n(gpu.heap, num_words, 0); | ||
| 67 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 68 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 69 | } | ||
| 70 | // Clean up tailing bits | ||
| 71 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||
| 72 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||
| 73 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||
| 74 | const u64 last_word = (~u64{0} << shift) >> shift; | ||
| 75 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 76 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 77 | } | ||
| 78 | |||
| 79 | ~Words() { | ||
| 80 | Release(); | ||
| 81 | } | ||
| 82 | |||
| 83 | Words& operator=(Words&& rhs) noexcept { | ||
| 84 | Release(); | ||
| 85 | size_bytes = rhs.size_bytes; | ||
| 86 | num_words = rhs.num_words; | ||
| 87 | cpu = rhs.cpu; | ||
| 88 | gpu = rhs.gpu; | ||
| 89 | cached_cpu = rhs.cached_cpu; | ||
| 90 | untracked = rhs.untracked; | ||
| 91 | rhs.cpu.heap = nullptr; | ||
| 92 | return *this; | ||
| 93 | } | ||
| 94 | |||
| 95 | Words(Words&& rhs) noexcept | ||
| 96 | : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||
| 97 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 98 | rhs.cpu.heap = nullptr; | ||
| 99 | } | ||
| 100 | |||
| 101 | Words& operator=(const Words&) = delete; | ||
| 102 | Words(const Words&) = delete; | ||
| 103 | |||
| 104 | /// Returns true when the buffer fits in the small vector optimization | ||
| 105 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 106 | return num_words <= stack_words; | ||
| 107 | } | ||
| 108 | |||
| 109 | /// Returns the number of words of the buffer | ||
| 110 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 111 | return num_words; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Release buffer resources | ||
| 115 | void Release() { | ||
| 116 | if (!IsShort()) { | ||
| 117 | // CPU written words is the base for the heap allocation | ||
| 118 | delete[] cpu.heap; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | template <Type type> | ||
| 123 | std::span<u64> Span() noexcept { | ||
| 124 | if constexpr (type == Type::CPU) { | ||
| 125 | return std::span<u64>(cpu.Pointer(IsShort()), num_words); | ||
| 126 | } else if constexpr (type == Type::GPU) { | ||
| 127 | return std::span<u64>(gpu.Pointer(IsShort()), num_words); | ||
| 128 | } else if constexpr (type == Type::CachedCPU) { | ||
| 129 | return std::span<u64>(cached_cpu.Pointer(IsShort()), num_words); | ||
| 130 | } else if constexpr (type == Type::Untracked) { | ||
| 131 | return std::span<u64>(untracked.Pointer(IsShort()), num_words); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | template <Type type> | ||
| 136 | std::span<const u64> Span() const noexcept { | ||
| 137 | if constexpr (type == Type::CPU) { | ||
| 138 | return std::span<const u64>(cpu.Pointer(IsShort()), num_words); | ||
| 139 | } else if constexpr (type == Type::GPU) { | ||
| 140 | return std::span<const u64>(gpu.Pointer(IsShort()), num_words); | ||
| 141 | } else if constexpr (type == Type::CachedCPU) { | ||
| 142 | return std::span<const u64>(cached_cpu.Pointer(IsShort()), num_words); | ||
| 143 | } else if constexpr (type == Type::Untracked) { | ||
| 144 | return std::span<const u64>(untracked.Pointer(IsShort()), num_words); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | u64 size_bytes = 0; | ||
| 149 | size_t num_words = 0; | ||
| 150 | WordsArray<stack_words> cpu; | ||
| 151 | WordsArray<stack_words> gpu; | ||
| 152 | WordsArray<stack_words> cached_cpu; | ||
| 153 | WordsArray<stack_words> untracked; | ||
| 154 | }; | ||
| 155 | |||
| 156 | template <class RasterizerInterface, size_t stack_words = 1> | ||
| 157 | class WordManager { | ||
| 158 | public: | ||
| 159 | explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | ||
| 160 | : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | ||
| 161 | |||
| 162 | explicit WordManager() = default; | ||
| 163 | |||
| 164 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 165 | cpu_addr = new_cpu_addr; | ||
| 166 | } | ||
| 167 | |||
| 168 | VAddr GetCpuAddr() const { | ||
| 169 | return cpu_addr; | ||
| 170 | } | ||
| 171 | |||
| 172 | static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { | ||
| 173 | constexpr size_t number_bits = sizeof(u64) * 8; | ||
| 174 | const size_t limit_page_end = number_bits - std::min(page_end, number_bits); | ||
| 175 | u64 bits = (word >> page_start) << page_start; | ||
| 176 | bits = (bits << limit_page_end) >> limit_page_end; | ||
| 177 | return bits; | ||
| 178 | } | ||
| 179 | |||
| 180 | static std::pair<size_t, size_t> GetWordPage(VAddr address) { | ||
| 181 | const size_t converted_address = static_cast<size_t>(address); | ||
| 182 | const size_t word_number = converted_address / BYTES_PER_WORD; | ||
| 183 | const size_t amount_pages = converted_address % BYTES_PER_WORD; | ||
| 184 | return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); | ||
| 185 | } | ||
| 186 | |||
| 187 | template <typename Func> | ||
| 188 | void IterateWords(size_t offset, size_t size, Func&& func) const { | ||
| 189 | using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>; | ||
| 190 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 191 | const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL)); | ||
| 192 | const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL)); | ||
| 193 | if (start >= SizeBytes() || end <= start) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | auto [start_word, start_page] = GetWordPage(start); | ||
| 197 | auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); | ||
| 198 | const size_t num_words = NumWords(); | ||
| 199 | start_word = std::min(start_word, num_words); | ||
| 200 | end_word = std::min(end_word, num_words); | ||
| 201 | const size_t diff = end_word - start_word; | ||
| 202 | end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; | ||
| 203 | end_word = std::min(end_word, num_words); | ||
| 204 | end_page += diff * PAGES_PER_WORD; | ||
| 205 | constexpr u64 base_mask{~0ULL}; | ||
| 206 | for (size_t word_index = start_word; word_index < end_word; word_index++) { | ||
| 207 | const u64 mask = ExtractBits(base_mask, start_page, end_page); | ||
| 208 | start_page = 0; | ||
| 209 | end_page -= PAGES_PER_WORD; | ||
| 210 | if constexpr (BOOL_BREAK) { | ||
| 211 | if (func(word_index, mask)) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | } else { | ||
| 215 | func(word_index, mask); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | template <typename Func> | ||
| 221 | void IteratePages(u64 mask, Func&& func) const { | ||
| 222 | size_t offset = 0; | ||
| 223 | while (mask != 0) { | ||
| 224 | const size_t empty_bits = std::countr_zero(mask); | ||
| 225 | offset += empty_bits; | ||
| 226 | mask = mask >> empty_bits; | ||
| 227 | |||
| 228 | const size_t continuous_bits = std::countr_one(mask); | ||
| 229 | func(offset, continuous_bits); | ||
| 230 | mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; | ||
| 231 | offset += continuous_bits; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * Change the state of a range of pages | ||
| 237 | * | ||
| 238 | * @param dirty_addr Base address to mark or unmark as modified | ||
| 239 | * @param size Size in bytes to mark or unmark as modified | ||
| 240 | */ | ||
| 241 | template <Type type, bool enable> | ||
| 242 | void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { | ||
| 243 | std::span<u64> state_words = words.template Span<type>(); | ||
| 244 | [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); | ||
| 245 | [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>(); | ||
| 246 | IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { | ||
| 247 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 248 | NotifyRasterizer<!enable>(index, untracked_words[index], mask); | ||
| 249 | } | ||
| 250 | if constexpr (enable) { | ||
| 251 | state_words[index] |= mask; | ||
| 252 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 253 | untracked_words[index] |= mask; | ||
| 254 | } | ||
| 255 | if constexpr (type == Type::CPU) { | ||
| 256 | cached_words[index] &= ~mask; | ||
| 257 | } | ||
| 258 | } else { | ||
| 259 | if constexpr (type == Type::CPU) { | ||
| 260 | const u64 word = state_words[index] & mask; | ||
| 261 | cached_words[index] &= ~word; | ||
| 262 | } | ||
| 263 | state_words[index] &= ~mask; | ||
| 264 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 265 | untracked_words[index] &= ~mask; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | }); | ||
| 269 | } | ||
| 270 | |||
| 271 | /** | ||
| 272 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||
| 273 | * needed. Call the given function on each turned off range. | ||
| 274 | * | ||
| 275 | * @param query_cpu_range Base CPU address to loop over | ||
| 276 | * @param size Size in bytes of the CPU range to loop over | ||
| 277 | * @param func Function to call for each turned off region | ||
| 278 | */ | ||
| 279 | template <Type type, bool clear, typename Func> | ||
| 280 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | ||
| 281 | static_assert(type != Type::Untracked); | ||
| 282 | |||
| 283 | std::span<u64> state_words = words.template Span<type>(); | ||
| 284 | [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); | ||
| 285 | [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>(); | ||
| 286 | const size_t offset = query_cpu_range - cpu_addr; | ||
| 287 | bool pending = false; | ||
| 288 | size_t pending_offset{}; | ||
| 289 | size_t pending_pointer{}; | ||
| 290 | const auto release = [&]() { | ||
| 291 | func(cpu_addr + pending_offset * BYTES_PER_PAGE, | ||
| 292 | (pending_pointer - pending_offset) * BYTES_PER_PAGE); | ||
| 293 | }; | ||
| 294 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 295 | const u64 word = state_words[index] & mask; | ||
| 296 | if constexpr (clear) { | ||
| 297 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 298 | NotifyRasterizer<true>(index, untracked_words[index], mask); | ||
| 299 | } | ||
| 300 | state_words[index] &= ~mask; | ||
| 301 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 302 | untracked_words[index] &= ~mask; | ||
| 303 | } | ||
| 304 | if constexpr (type == Type::CPU) { | ||
| 305 | cached_words[index] &= ~word; | ||
| 306 | } | ||
| 307 | } | ||
| 308 | const size_t base_offset = index * PAGES_PER_WORD; | ||
| 309 | IteratePages(word, [&](size_t pages_offset, size_t pages_size) { | ||
| 310 | const auto reset = [&]() { | ||
| 311 | pending_offset = base_offset + pages_offset; | ||
| 312 | pending_pointer = base_offset + pages_offset + pages_size; | ||
| 313 | }; | ||
| 314 | if (!pending) { | ||
| 315 | reset(); | ||
| 316 | pending = true; | ||
| 317 | return; | ||
| 318 | } | ||
| 319 | if (pending_pointer == base_offset + pages_offset) { | ||
| 320 | pending_pointer += pages_size; | ||
| 321 | return; | ||
| 322 | } | ||
| 323 | release(); | ||
| 324 | reset(); | ||
| 325 | }); | ||
| 326 | }); | ||
| 327 | if (pending) { | ||
| 328 | release(); | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | /** | ||
| 333 | * Returns true when a region has been modified | ||
| 334 | * | ||
| 335 | * @param offset Offset in bytes from the start of the buffer | ||
| 336 | * @param size Size in bytes of the region to query for modifications | ||
| 337 | */ | ||
| 338 | template <Type type> | ||
| 339 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||
| 340 | static_assert(type != Type::Untracked); | ||
| 341 | |||
| 342 | const std::span<const u64> state_words = words.template Span<type>(); | ||
| 343 | bool result = false; | ||
| 344 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 345 | const u64 word = state_words[index] & mask; | ||
| 346 | if (word != 0) { | ||
| 347 | result = true; | ||
| 348 | return true; | ||
| 349 | } | ||
| 350 | return false; | ||
| 351 | }); | ||
| 352 | return result; | ||
| 353 | } | ||
| 354 | |||
| 355 | /** | ||
| 356 | * Returns a begin end pair with the inclusive modified region | ||
| 357 | * | ||
| 358 | * @param offset Offset in bytes from the start of the buffer | ||
| 359 | * @param size Size in bytes of the region to query for modifications | ||
| 360 | */ | ||
| 361 | template <Type type> | ||
| 362 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||
| 363 | static_assert(type != Type::Untracked); | ||
| 364 | const std::span<const u64> state_words = words.template Span<type>(); | ||
| 365 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 366 | u64 end = 0; | ||
| 367 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 368 | const u64 word = state_words[index] & mask; | ||
| 369 | if (word == 0) { | ||
| 370 | return; | ||
| 371 | } | ||
| 372 | const u64 local_page_begin = std::countr_zero(word); | ||
| 373 | const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); | ||
| 374 | const u64 page_index = index * PAGES_PER_WORD; | ||
| 375 | begin = std::min(begin, page_index + local_page_begin); | ||
| 376 | end = page_index + local_page_end; | ||
| 377 | }); | ||
| 378 | static constexpr std::pair<u64, u64> EMPTY{0, 0}; | ||
| 379 | return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; | ||
| 380 | } | ||
| 381 | |||
| 382 | /// Returns the number of words of the manager | ||
| 383 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 384 | return words.NumWords(); | ||
| 385 | } | ||
| 386 | |||
| 387 | /// Returns the size in bytes of the manager | ||
| 388 | [[nodiscard]] u64 SizeBytes() const noexcept { | ||
| 389 | return words.size_bytes; | ||
| 390 | } | ||
| 391 | |||
| 392 | /// Returns true when the buffer fits in the small vector optimization | ||
| 393 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 394 | return words.IsShort(); | ||
| 395 | } | ||
| 396 | |||
| 397 | void FlushCachedWrites() noexcept { | ||
| 398 | const u64 num_words = NumWords(); | ||
| 399 | u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 400 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 401 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 402 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 403 | const u64 cached_bits = cached_words[word_index]; | ||
| 404 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 405 | untracked_words[word_index] |= cached_bits; | ||
| 406 | cpu_words[word_index] |= cached_bits; | ||
| 407 | cached_words[word_index] = 0; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | private: | ||
| 412 | template <Type type> | ||
| 413 | u64* Array() noexcept { | ||
| 414 | if constexpr (type == Type::CPU) { | ||
| 415 | return words.cpu.Pointer(IsShort()); | ||
| 416 | } else if constexpr (type == Type::GPU) { | ||
| 417 | return words.gpu.Pointer(IsShort()); | ||
| 418 | } else if constexpr (type == Type::CachedCPU) { | ||
| 419 | return words.cached_cpu.Pointer(IsShort()); | ||
| 420 | } else if constexpr (type == Type::Untracked) { | ||
| 421 | return words.untracked.Pointer(IsShort()); | ||
| 422 | } | ||
| 423 | } | ||
| 424 | |||
| 425 | template <Type type> | ||
| 426 | const u64* Array() const noexcept { | ||
| 427 | if constexpr (type == Type::CPU) { | ||
| 428 | return words.cpu.Pointer(IsShort()); | ||
| 429 | } else if constexpr (type == Type::GPU) { | ||
| 430 | return words.gpu.Pointer(IsShort()); | ||
| 431 | } else if constexpr (type == Type::CachedCPU) { | ||
| 432 | return words.cached_cpu.Pointer(IsShort()); | ||
| 433 | } else if constexpr (type == Type::Untracked) { | ||
| 434 | return words.untracked.Pointer(IsShort()); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | |||
| 438 | /** | ||
| 439 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||
| 440 | * | ||
| 441 | * @param word_index Index to the word to notify to the rasterizer | ||
| 442 | * @param current_bits Current state of the word | ||
| 443 | * @param new_bits New state of the word | ||
| 444 | * | ||
| 445 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||
| 446 | */ | ||
| 447 | template <bool add_to_rasterizer> | ||
| 448 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||
| 449 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||
| 450 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||
| 451 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | ||
| 452 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | ||
| 453 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | ||
| 454 | }); | ||
| 455 | } | ||
| 456 | |||
| 457 | VAddr cpu_addr = 0; | ||
| 458 | RasterizerInterface* rasterizer = nullptr; | ||
| 459 | Words<stack_words> words; | ||
| 460 | }; | ||
| 461 | |||
| 462 | } // namespace VideoCommon | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a8c3f8b67..18d3c3ac0 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 13 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -200,6 +201,8 @@ private: | |||
| 200 | struct BufferCacheParams { | 201 | struct BufferCacheParams { |
| 201 | using Runtime = OpenGL::BufferCacheRuntime; | 202 | using Runtime = OpenGL::BufferCacheRuntime; |
| 202 | using Buffer = OpenGL::Buffer; | 203 | using Buffer = OpenGL::Buffer; |
| 204 | using Async_Buffer = u32; | ||
| 205 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||
| 203 | 206 | ||
| 204 | static constexpr bool IS_OPENGL = true; | 207 | static constexpr bool IS_OPENGL = true; |
| 205 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | 208 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; |
| @@ -208,6 +211,7 @@ struct BufferCacheParams { | |||
| 208 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 211 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 209 | static constexpr bool USE_MEMORY_MAPS = false; | 212 | static constexpr bool USE_MEMORY_MAPS = false; |
| 210 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | 213 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; |
| 214 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | ||
| 211 | }; | 215 | }; |
| 212 | 216 | ||
| 213 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 217 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp new file mode 100644 index 000000000..f15ae8e25 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 5 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>; | ||
| 9 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 9cbcb3c8f..510602e8e 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -314,8 +314,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | |||
| 314 | return staging_pool.Request(size, MemoryUsage::Upload); | 314 | return staging_pool.Request(size, MemoryUsage::Upload); |
| 315 | } | 315 | } |
| 316 | 316 | ||
| 317 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | 317 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { |
| 318 | return staging_pool.Request(size, MemoryUsage::Download); | 318 | return staging_pool.Request(size, MemoryUsage::Download, deferred); |
| 319 | } | ||
| 320 | |||
| 321 | void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { | ||
| 322 | staging_pool.FreeDeferred(ref); | ||
| 319 | } | 323 | } |
| 320 | 324 | ||
| 321 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { | 325 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 183b33632..879f1ed94 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -3,7 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "video_core/buffer_cache/buffer_cache.h" | 6 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 7 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 9 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -75,7 +76,9 @@ public: | |||
| 75 | 76 | ||
| 76 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 77 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 77 | 78 | ||
| 78 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 79 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); |
| 80 | |||
| 81 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); | ||
| 79 | 82 | ||
| 80 | void PreCopyBarrier(); | 83 | void PreCopyBarrier(); |
| 81 | 84 | ||
| @@ -142,6 +145,8 @@ private: | |||
| 142 | struct BufferCacheParams { | 145 | struct BufferCacheParams { |
| 143 | using Runtime = Vulkan::BufferCacheRuntime; | 146 | using Runtime = Vulkan::BufferCacheRuntime; |
| 144 | using Buffer = Vulkan::Buffer; | 147 | using Buffer = Vulkan::Buffer; |
| 148 | using Async_Buffer = Vulkan::StagingBufferRef; | ||
| 149 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||
| 145 | 150 | ||
| 146 | static constexpr bool IS_OPENGL = false; | 151 | static constexpr bool IS_OPENGL = false; |
| 147 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | 152 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; |
| @@ -150,6 +155,7 @@ struct BufferCacheParams { | |||
| 150 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 155 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 151 | static constexpr bool USE_MEMORY_MAPS = true; | 156 | static constexpr bool USE_MEMORY_MAPS = true; |
| 152 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | 157 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; |
| 158 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||
| 153 | }; | 159 | }; |
| 154 | 160 | ||
| 155 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 161 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp new file mode 100644 index 000000000..f9e271507 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 5 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>; | ||
| 9 | } | ||