diff options
Diffstat (limited to 'src')
59 files changed, 3775 insertions, 2006 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 312a49f42..5e3a74c0f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -113,6 +113,9 @@ else() | |||
| 113 | 113 | ||
| 114 | $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> | 114 | $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> |
| 115 | $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> | 115 | $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> |
| 116 | $<$<CXX_COMPILER_ID:Clang>:-Werror=shadow-uncaptured-local> | ||
| 117 | $<$<CXX_COMPILER_ID:Clang>:-Werror=implicit-fallthrough> | ||
| 118 | $<$<CXX_COMPILER_ID:Clang>:-Werror=type-limits> | ||
| 116 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init> | 119 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init> |
| 117 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field> | 120 | $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field> |
| 118 | ) | 121 | ) |
diff --git a/src/common/intrusive_list.h b/src/common/intrusive_list.h new file mode 100644 index 000000000..d330dc1c2 --- /dev/null +++ b/src/common/intrusive_list.h | |||
| @@ -0,0 +1,631 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/common_funcs.h" | ||
| 7 | #include "common/parent_of_member.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | // Forward declare implementation class for Node. | ||
| 12 | namespace impl { | ||
| 13 | |||
| 14 | class IntrusiveListImpl; | ||
| 15 | |||
| 16 | } | ||
| 17 | |||
| 18 | class IntrusiveListNode { | ||
| 19 | YUZU_NON_COPYABLE(IntrusiveListNode); | ||
| 20 | |||
| 21 | private: | ||
| 22 | friend class impl::IntrusiveListImpl; | ||
| 23 | |||
| 24 | IntrusiveListNode* m_prev; | ||
| 25 | IntrusiveListNode* m_next; | ||
| 26 | |||
| 27 | public: | ||
| 28 | constexpr IntrusiveListNode() : m_prev(this), m_next(this) {} | ||
| 29 | |||
| 30 | constexpr bool IsLinked() const { | ||
| 31 | return m_next != this; | ||
| 32 | } | ||
| 33 | |||
| 34 | private: | ||
| 35 | constexpr void LinkPrev(IntrusiveListNode* node) { | ||
| 36 | // We can't link an already linked node. | ||
| 37 | ASSERT(!node->IsLinked()); | ||
| 38 | this->SplicePrev(node, node); | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr void SplicePrev(IntrusiveListNode* first, IntrusiveListNode* last) { | ||
| 42 | // Splice a range into the list. | ||
| 43 | auto last_prev = last->m_prev; | ||
| 44 | first->m_prev = m_prev; | ||
| 45 | last_prev->m_next = this; | ||
| 46 | m_prev->m_next = first; | ||
| 47 | m_prev = last_prev; | ||
| 48 | } | ||
| 49 | |||
| 50 | constexpr void LinkNext(IntrusiveListNode* node) { | ||
| 51 | // We can't link an already linked node. | ||
| 52 | ASSERT(!node->IsLinked()); | ||
| 53 | return this->SpliceNext(node, node); | ||
| 54 | } | ||
| 55 | |||
| 56 | constexpr void SpliceNext(IntrusiveListNode* first, IntrusiveListNode* last) { | ||
| 57 | // Splice a range into the list. | ||
| 58 | auto last_prev = last->m_prev; | ||
| 59 | first->m_prev = this; | ||
| 60 | last_prev->m_next = m_next; | ||
| 61 | m_next->m_prev = last_prev; | ||
| 62 | m_next = first; | ||
| 63 | } | ||
| 64 | |||
| 65 | constexpr void Unlink() { | ||
| 66 | this->Unlink(m_next); | ||
| 67 | } | ||
| 68 | |||
| 69 | constexpr void Unlink(IntrusiveListNode* last) { | ||
| 70 | // Unlink a node from a next node. | ||
| 71 | auto last_prev = last->m_prev; | ||
| 72 | m_prev->m_next = last; | ||
| 73 | last->m_prev = m_prev; | ||
| 74 | last_prev->m_next = this; | ||
| 75 | m_prev = last_prev; | ||
| 76 | } | ||
| 77 | |||
| 78 | constexpr IntrusiveListNode* GetPrev() { | ||
| 79 | return m_prev; | ||
| 80 | } | ||
| 81 | |||
| 82 | constexpr const IntrusiveListNode* GetPrev() const { | ||
| 83 | return m_prev; | ||
| 84 | } | ||
| 85 | |||
| 86 | constexpr IntrusiveListNode* GetNext() { | ||
| 87 | return m_next; | ||
| 88 | } | ||
| 89 | |||
| 90 | constexpr const IntrusiveListNode* GetNext() const { | ||
| 91 | return m_next; | ||
| 92 | } | ||
| 93 | }; | ||
| 94 | // DEPRECATED: static_assert(std::is_literal_type<IntrusiveListNode>::value); | ||
| 95 | |||
| 96 | namespace impl { | ||
| 97 | |||
| 98 | class IntrusiveListImpl { | ||
| 99 | YUZU_NON_COPYABLE(IntrusiveListImpl); | ||
| 100 | |||
| 101 | private: | ||
| 102 | IntrusiveListNode m_root_node; | ||
| 103 | |||
| 104 | public: | ||
| 105 | template <bool Const> | ||
| 106 | class Iterator; | ||
| 107 | |||
| 108 | using value_type = IntrusiveListNode; | ||
| 109 | using size_type = size_t; | ||
| 110 | using difference_type = ptrdiff_t; | ||
| 111 | using pointer = value_type*; | ||
| 112 | using const_pointer = const value_type*; | ||
| 113 | using reference = value_type&; | ||
| 114 | using const_reference = const value_type&; | ||
| 115 | using iterator = Iterator<false>; | ||
| 116 | using const_iterator = Iterator<true>; | ||
| 117 | using reverse_iterator = std::reverse_iterator<iterator>; | ||
| 118 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; | ||
| 119 | |||
| 120 | template <bool Const> | ||
| 121 | class Iterator { | ||
| 122 | public: | ||
| 123 | using iterator_category = std::bidirectional_iterator_tag; | ||
| 124 | using value_type = typename IntrusiveListImpl::value_type; | ||
| 125 | using difference_type = typename IntrusiveListImpl::difference_type; | ||
| 126 | using pointer = | ||
| 127 | std::conditional_t<Const, IntrusiveListImpl::const_pointer, IntrusiveListImpl::pointer>; | ||
| 128 | using reference = std::conditional_t<Const, IntrusiveListImpl::const_reference, | ||
| 129 | IntrusiveListImpl::reference>; | ||
| 130 | |||
| 131 | private: | ||
| 132 | pointer m_node; | ||
| 133 | |||
| 134 | public: | ||
| 135 | constexpr explicit Iterator(pointer n) : m_node(n) {} | ||
| 136 | |||
| 137 | constexpr bool operator==(const Iterator& rhs) const { | ||
| 138 | return m_node == rhs.m_node; | ||
| 139 | } | ||
| 140 | |||
| 141 | constexpr pointer operator->() const { | ||
| 142 | return m_node; | ||
| 143 | } | ||
| 144 | |||
| 145 | constexpr reference operator*() const { | ||
| 146 | return *m_node; | ||
| 147 | } | ||
| 148 | |||
| 149 | constexpr Iterator& operator++() { | ||
| 150 | m_node = m_node->m_next; | ||
| 151 | return *this; | ||
| 152 | } | ||
| 153 | |||
| 154 | constexpr Iterator& operator--() { | ||
| 155 | m_node = m_node->m_prev; | ||
| 156 | return *this; | ||
| 157 | } | ||
| 158 | |||
| 159 | constexpr Iterator operator++(int) { | ||
| 160 | const Iterator it{*this}; | ||
| 161 | ++(*this); | ||
| 162 | return it; | ||
| 163 | } | ||
| 164 | |||
| 165 | constexpr Iterator operator--(int) { | ||
| 166 | const Iterator it{*this}; | ||
| 167 | --(*this); | ||
| 168 | return it; | ||
| 169 | } | ||
| 170 | |||
| 171 | constexpr operator Iterator<true>() const { | ||
| 172 | return Iterator<true>(m_node); | ||
| 173 | } | ||
| 174 | |||
| 175 | constexpr Iterator<false> GetNonConstIterator() const { | ||
| 176 | return Iterator<false>(const_cast<IntrusiveListImpl::pointer>(m_node)); | ||
| 177 | } | ||
| 178 | }; | ||
| 179 | |||
| 180 | public: | ||
| 181 | constexpr IntrusiveListImpl() : m_root_node() {} | ||
| 182 | |||
| 183 | // Iterator accessors. | ||
| 184 | constexpr iterator begin() { | ||
| 185 | return iterator(m_root_node.GetNext()); | ||
| 186 | } | ||
| 187 | |||
| 188 | constexpr const_iterator begin() const { | ||
| 189 | return const_iterator(m_root_node.GetNext()); | ||
| 190 | } | ||
| 191 | |||
| 192 | constexpr iterator end() { | ||
| 193 | return iterator(std::addressof(m_root_node)); | ||
| 194 | } | ||
| 195 | |||
| 196 | constexpr const_iterator end() const { | ||
| 197 | return const_iterator(std::addressof(m_root_node)); | ||
| 198 | } | ||
| 199 | |||
| 200 | constexpr iterator iterator_to(reference v) { | ||
| 201 | // Only allow iterator_to for values in lists. | ||
| 202 | ASSERT(v.IsLinked()); | ||
| 203 | return iterator(std::addressof(v)); | ||
| 204 | } | ||
| 205 | |||
| 206 | constexpr const_iterator iterator_to(const_reference v) const { | ||
| 207 | // Only allow iterator_to for values in lists. | ||
| 208 | ASSERT(v.IsLinked()); | ||
| 209 | return const_iterator(std::addressof(v)); | ||
| 210 | } | ||
| 211 | |||
| 212 | // Content management. | ||
| 213 | constexpr bool empty() const { | ||
| 214 | return !m_root_node.IsLinked(); | ||
| 215 | } | ||
| 216 | |||
| 217 | constexpr size_type size() const { | ||
| 218 | return static_cast<size_type>(std::distance(this->begin(), this->end())); | ||
| 219 | } | ||
| 220 | |||
| 221 | constexpr reference back() { | ||
| 222 | return *m_root_node.GetPrev(); | ||
| 223 | } | ||
| 224 | |||
| 225 | constexpr const_reference back() const { | ||
| 226 | return *m_root_node.GetPrev(); | ||
| 227 | } | ||
| 228 | |||
| 229 | constexpr reference front() { | ||
| 230 | return *m_root_node.GetNext(); | ||
| 231 | } | ||
| 232 | |||
| 233 | constexpr const_reference front() const { | ||
| 234 | return *m_root_node.GetNext(); | ||
| 235 | } | ||
| 236 | |||
| 237 | constexpr void push_back(reference node) { | ||
| 238 | m_root_node.LinkPrev(std::addressof(node)); | ||
| 239 | } | ||
| 240 | |||
| 241 | constexpr void push_front(reference node) { | ||
| 242 | m_root_node.LinkNext(std::addressof(node)); | ||
| 243 | } | ||
| 244 | |||
| 245 | constexpr void pop_back() { | ||
| 246 | m_root_node.GetPrev()->Unlink(); | ||
| 247 | } | ||
| 248 | |||
| 249 | constexpr void pop_front() { | ||
| 250 | m_root_node.GetNext()->Unlink(); | ||
| 251 | } | ||
| 252 | |||
| 253 | constexpr iterator insert(const_iterator pos, reference node) { | ||
| 254 | pos.GetNonConstIterator()->LinkPrev(std::addressof(node)); | ||
| 255 | return iterator(std::addressof(node)); | ||
| 256 | } | ||
| 257 | |||
| 258 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o) { | ||
| 259 | splice_impl(pos, o.begin(), o.end()); | ||
| 260 | } | ||
| 261 | |||
| 262 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first) { | ||
| 263 | const_iterator last(first); | ||
| 264 | std::advance(last, 1); | ||
| 265 | splice_impl(pos, first, last); | ||
| 266 | } | ||
| 267 | |||
| 268 | constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first, | ||
| 269 | const_iterator last) { | ||
| 270 | splice_impl(pos, first, last); | ||
| 271 | } | ||
| 272 | |||
| 273 | constexpr iterator erase(const_iterator pos) { | ||
| 274 | if (pos == this->end()) { | ||
| 275 | return this->end(); | ||
| 276 | } | ||
| 277 | iterator it(pos.GetNonConstIterator()); | ||
| 278 | (it++)->Unlink(); | ||
| 279 | return it; | ||
| 280 | } | ||
| 281 | |||
| 282 | constexpr void clear() { | ||
| 283 | while (!this->empty()) { | ||
| 284 | this->pop_front(); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | private: | ||
| 289 | constexpr void splice_impl(const_iterator _pos, const_iterator _first, const_iterator _last) { | ||
| 290 | if (_first == _last) { | ||
| 291 | return; | ||
| 292 | } | ||
| 293 | iterator pos(_pos.GetNonConstIterator()); | ||
| 294 | iterator first(_first.GetNonConstIterator()); | ||
| 295 | iterator last(_last.GetNonConstIterator()); | ||
| 296 | first->Unlink(std::addressof(*last)); | ||
| 297 | pos->SplicePrev(std::addressof(*first), std::addressof(*first)); | ||
| 298 | } | ||
| 299 | }; | ||
| 300 | |||
| 301 | } // namespace impl | ||
| 302 | |||
| 303 | template <class T, class Traits> | ||
| 304 | class IntrusiveList { | ||
| 305 | YUZU_NON_COPYABLE(IntrusiveList); | ||
| 306 | |||
| 307 | private: | ||
| 308 | impl::IntrusiveListImpl m_impl; | ||
| 309 | |||
| 310 | public: | ||
| 311 | template <bool Const> | ||
| 312 | class Iterator; | ||
| 313 | |||
| 314 | using value_type = T; | ||
| 315 | using size_type = size_t; | ||
| 316 | using difference_type = ptrdiff_t; | ||
| 317 | using pointer = value_type*; | ||
| 318 | using const_pointer = const value_type*; | ||
| 319 | using reference = value_type&; | ||
| 320 | using const_reference = const value_type&; | ||
| 321 | using iterator = Iterator<false>; | ||
| 322 | using const_iterator = Iterator<true>; | ||
| 323 | using reverse_iterator = std::reverse_iterator<iterator>; | ||
| 324 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; | ||
| 325 | |||
| 326 | template <bool Const> | ||
| 327 | class Iterator { | ||
| 328 | public: | ||
| 329 | friend class Common::IntrusiveList<T, Traits>; | ||
| 330 | |||
| 331 | using ImplIterator = | ||
| 332 | std::conditional_t<Const, Common::impl::IntrusiveListImpl::const_iterator, | ||
| 333 | Common::impl::IntrusiveListImpl::iterator>; | ||
| 334 | |||
| 335 | using iterator_category = std::bidirectional_iterator_tag; | ||
| 336 | using value_type = typename IntrusiveList::value_type; | ||
| 337 | using difference_type = typename IntrusiveList::difference_type; | ||
| 338 | using pointer = | ||
| 339 | std::conditional_t<Const, IntrusiveList::const_pointer, IntrusiveList::pointer>; | ||
| 340 | using reference = | ||
| 341 | std::conditional_t<Const, IntrusiveList::const_reference, IntrusiveList::reference>; | ||
| 342 | |||
| 343 | private: | ||
| 344 | ImplIterator m_iterator; | ||
| 345 | |||
| 346 | private: | ||
| 347 | constexpr explicit Iterator(ImplIterator it) : m_iterator(it) {} | ||
| 348 | |||
| 349 | constexpr ImplIterator GetImplIterator() const { | ||
| 350 | return m_iterator; | ||
| 351 | } | ||
| 352 | |||
| 353 | public: | ||
| 354 | constexpr bool operator==(const Iterator& rhs) const { | ||
| 355 | return m_iterator == rhs.m_iterator; | ||
| 356 | } | ||
| 357 | |||
| 358 | constexpr pointer operator->() const { | ||
| 359 | return std::addressof(Traits::GetParent(*m_iterator)); | ||
| 360 | } | ||
| 361 | |||
| 362 | constexpr reference operator*() const { | ||
| 363 | return Traits::GetParent(*m_iterator); | ||
| 364 | } | ||
| 365 | |||
| 366 | constexpr Iterator& operator++() { | ||
| 367 | ++m_iterator; | ||
| 368 | return *this; | ||
| 369 | } | ||
| 370 | |||
| 371 | constexpr Iterator& operator--() { | ||
| 372 | --m_iterator; | ||
| 373 | return *this; | ||
| 374 | } | ||
| 375 | |||
| 376 | constexpr Iterator operator++(int) { | ||
| 377 | const Iterator it{*this}; | ||
| 378 | ++m_iterator; | ||
| 379 | return it; | ||
| 380 | } | ||
| 381 | |||
| 382 | constexpr Iterator operator--(int) { | ||
| 383 | const Iterator it{*this}; | ||
| 384 | --m_iterator; | ||
| 385 | return it; | ||
| 386 | } | ||
| 387 | |||
| 388 | constexpr operator Iterator<true>() const { | ||
| 389 | return Iterator<true>(m_iterator); | ||
| 390 | } | ||
| 391 | }; | ||
| 392 | |||
| 393 | private: | ||
| 394 | static constexpr IntrusiveListNode& GetNode(reference ref) { | ||
| 395 | return Traits::GetNode(ref); | ||
| 396 | } | ||
| 397 | |||
| 398 | static constexpr IntrusiveListNode const& GetNode(const_reference ref) { | ||
| 399 | return Traits::GetNode(ref); | ||
| 400 | } | ||
| 401 | |||
| 402 | static constexpr reference GetParent(IntrusiveListNode& node) { | ||
| 403 | return Traits::GetParent(node); | ||
| 404 | } | ||
| 405 | |||
| 406 | static constexpr const_reference GetParent(IntrusiveListNode const& node) { | ||
| 407 | return Traits::GetParent(node); | ||
| 408 | } | ||
| 409 | |||
| 410 | public: | ||
| 411 | constexpr IntrusiveList() : m_impl() {} | ||
| 412 | |||
| 413 | // Iterator accessors. | ||
| 414 | constexpr iterator begin() { | ||
| 415 | return iterator(m_impl.begin()); | ||
| 416 | } | ||
| 417 | |||
| 418 | constexpr const_iterator begin() const { | ||
| 419 | return const_iterator(m_impl.begin()); | ||
| 420 | } | ||
| 421 | |||
| 422 | constexpr iterator end() { | ||
| 423 | return iterator(m_impl.end()); | ||
| 424 | } | ||
| 425 | |||
| 426 | constexpr const_iterator end() const { | ||
| 427 | return const_iterator(m_impl.end()); | ||
| 428 | } | ||
| 429 | |||
| 430 | constexpr const_iterator cbegin() const { | ||
| 431 | return this->begin(); | ||
| 432 | } | ||
| 433 | |||
| 434 | constexpr const_iterator cend() const { | ||
| 435 | return this->end(); | ||
| 436 | } | ||
| 437 | |||
| 438 | constexpr reverse_iterator rbegin() { | ||
| 439 | return reverse_iterator(this->end()); | ||
| 440 | } | ||
| 441 | |||
| 442 | constexpr const_reverse_iterator rbegin() const { | ||
| 443 | return const_reverse_iterator(this->end()); | ||
| 444 | } | ||
| 445 | |||
| 446 | constexpr reverse_iterator rend() { | ||
| 447 | return reverse_iterator(this->begin()); | ||
| 448 | } | ||
| 449 | |||
| 450 | constexpr const_reverse_iterator rend() const { | ||
| 451 | return const_reverse_iterator(this->begin()); | ||
| 452 | } | ||
| 453 | |||
| 454 | constexpr const_reverse_iterator crbegin() const { | ||
| 455 | return this->rbegin(); | ||
| 456 | } | ||
| 457 | |||
| 458 | constexpr const_reverse_iterator crend() const { | ||
| 459 | return this->rend(); | ||
| 460 | } | ||
| 461 | |||
| 462 | constexpr iterator iterator_to(reference v) { | ||
| 463 | return iterator(m_impl.iterator_to(GetNode(v))); | ||
| 464 | } | ||
| 465 | |||
| 466 | constexpr const_iterator iterator_to(const_reference v) const { | ||
| 467 | return const_iterator(m_impl.iterator_to(GetNode(v))); | ||
| 468 | } | ||
| 469 | |||
| 470 | // Content management. | ||
| 471 | constexpr bool empty() const { | ||
| 472 | return m_impl.empty(); | ||
| 473 | } | ||
| 474 | |||
| 475 | constexpr size_type size() const { | ||
| 476 | return m_impl.size(); | ||
| 477 | } | ||
| 478 | |||
| 479 | constexpr reference back() { | ||
| 480 | return GetParent(m_impl.back()); | ||
| 481 | } | ||
| 482 | |||
| 483 | constexpr const_reference back() const { | ||
| 484 | return GetParent(m_impl.back()); | ||
| 485 | } | ||
| 486 | |||
| 487 | constexpr reference front() { | ||
| 488 | return GetParent(m_impl.front()); | ||
| 489 | } | ||
| 490 | |||
| 491 | constexpr const_reference front() const { | ||
| 492 | return GetParent(m_impl.front()); | ||
| 493 | } | ||
| 494 | |||
| 495 | constexpr void push_back(reference ref) { | ||
| 496 | m_impl.push_back(GetNode(ref)); | ||
| 497 | } | ||
| 498 | |||
| 499 | constexpr void push_front(reference ref) { | ||
| 500 | m_impl.push_front(GetNode(ref)); | ||
| 501 | } | ||
| 502 | |||
| 503 | constexpr void pop_back() { | ||
| 504 | m_impl.pop_back(); | ||
| 505 | } | ||
| 506 | |||
| 507 | constexpr void pop_front() { | ||
| 508 | m_impl.pop_front(); | ||
| 509 | } | ||
| 510 | |||
| 511 | constexpr iterator insert(const_iterator pos, reference ref) { | ||
| 512 | return iterator(m_impl.insert(pos.GetImplIterator(), GetNode(ref))); | ||
| 513 | } | ||
| 514 | |||
| 515 | constexpr void splice(const_iterator pos, IntrusiveList& o) { | ||
| 516 | m_impl.splice(pos.GetImplIterator(), o.m_impl); | ||
| 517 | } | ||
| 518 | |||
| 519 | constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first) { | ||
| 520 | m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator()); | ||
| 521 | } | ||
| 522 | |||
| 523 | constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first, | ||
| 524 | const_iterator last) { | ||
| 525 | m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator(), | ||
| 526 | last.GetImplIterator()); | ||
| 527 | } | ||
| 528 | |||
| 529 | constexpr iterator erase(const_iterator pos) { | ||
| 530 | return iterator(m_impl.erase(pos.GetImplIterator())); | ||
| 531 | } | ||
| 532 | |||
| 533 | constexpr void clear() { | ||
| 534 | m_impl.clear(); | ||
| 535 | } | ||
| 536 | }; | ||
| 537 | |||
| 538 | template <auto T, class Derived = Common::impl::GetParentType<T>> | ||
| 539 | class IntrusiveListMemberTraits; | ||
| 540 | |||
| 541 | template <class Parent, IntrusiveListNode Parent::*Member, class Derived> | ||
| 542 | class IntrusiveListMemberTraits<Member, Derived> { | ||
| 543 | public: | ||
| 544 | using ListType = IntrusiveList<Derived, IntrusiveListMemberTraits>; | ||
| 545 | |||
| 546 | private: | ||
| 547 | friend class IntrusiveList<Derived, IntrusiveListMemberTraits>; | ||
| 548 | |||
| 549 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 550 | return parent.*Member; | ||
| 551 | } | ||
| 552 | |||
| 553 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 554 | return parent.*Member; | ||
| 555 | } | ||
| 556 | |||
| 557 | static Derived& GetParent(IntrusiveListNode& node) { | ||
| 558 | return Common::GetParentReference<Member, Derived>(std::addressof(node)); | ||
| 559 | } | ||
| 560 | |||
| 561 | static Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 562 | return Common::GetParentReference<Member, Derived>(std::addressof(node)); | ||
| 563 | } | ||
| 564 | }; | ||
| 565 | |||
| 566 | template <auto T, class Derived = Common::impl::GetParentType<T>> | ||
| 567 | class IntrusiveListMemberTraitsByNonConstexprOffsetOf; | ||
| 568 | |||
| 569 | template <class Parent, IntrusiveListNode Parent::*Member, class Derived> | ||
| 570 | class IntrusiveListMemberTraitsByNonConstexprOffsetOf<Member, Derived> { | ||
| 571 | public: | ||
| 572 | using ListType = IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>; | ||
| 573 | |||
| 574 | private: | ||
| 575 | friend class IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>; | ||
| 576 | |||
| 577 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 578 | return parent.*Member; | ||
| 579 | } | ||
| 580 | |||
| 581 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 582 | return parent.*Member; | ||
| 583 | } | ||
| 584 | |||
| 585 | static Derived& GetParent(IntrusiveListNode& node) { | ||
| 586 | return *reinterpret_cast<Derived*>(reinterpret_cast<char*>(std::addressof(node)) - | ||
| 587 | GetOffset()); | ||
| 588 | } | ||
| 589 | |||
| 590 | static Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 591 | return *reinterpret_cast<const Derived*>( | ||
| 592 | reinterpret_cast<const char*>(std::addressof(node)) - GetOffset()); | ||
| 593 | } | ||
| 594 | |||
| 595 | static uintptr_t GetOffset() { | ||
| 596 | return reinterpret_cast<uintptr_t>(std::addressof(reinterpret_cast<Derived*>(0)->*Member)); | ||
| 597 | } | ||
| 598 | }; | ||
| 599 | |||
| 600 | template <class Derived> | ||
| 601 | class IntrusiveListBaseNode : public IntrusiveListNode {}; | ||
| 602 | |||
| 603 | template <class Derived> | ||
| 604 | class IntrusiveListBaseTraits { | ||
| 605 | public: | ||
| 606 | using ListType = IntrusiveList<Derived, IntrusiveListBaseTraits>; | ||
| 607 | |||
| 608 | private: | ||
| 609 | friend class IntrusiveList<Derived, IntrusiveListBaseTraits>; | ||
| 610 | |||
| 611 | static constexpr IntrusiveListNode& GetNode(Derived& parent) { | ||
| 612 | return static_cast<IntrusiveListNode&>( | ||
| 613 | static_cast<IntrusiveListBaseNode<Derived>&>(parent)); | ||
| 614 | } | ||
| 615 | |||
| 616 | static constexpr IntrusiveListNode const& GetNode(Derived const& parent) { | ||
| 617 | return static_cast<const IntrusiveListNode&>( | ||
| 618 | static_cast<const IntrusiveListBaseNode<Derived>&>(parent)); | ||
| 619 | } | ||
| 620 | |||
| 621 | static constexpr Derived& GetParent(IntrusiveListNode& node) { | ||
| 622 | return static_cast<Derived&>(static_cast<IntrusiveListBaseNode<Derived>&>(node)); | ||
| 623 | } | ||
| 624 | |||
| 625 | static constexpr Derived const& GetParent(IntrusiveListNode const& node) { | ||
| 626 | return static_cast<const Derived&>( | ||
| 627 | static_cast<const IntrusiveListBaseNode<Derived>&>(node)); | ||
| 628 | } | ||
| 629 | }; | ||
| 630 | |||
| 631 | } // namespace Common | ||
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 84955030b..cb1bca467 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -45,6 +45,7 @@ void LogSettings() { | |||
| 45 | log_setting("System_LanguageIndex", values.language_index.GetValue()); | 45 | log_setting("System_LanguageIndex", values.language_index.GetValue()); |
| 46 | log_setting("System_RegionIndex", values.region_index.GetValue()); | 46 | log_setting("System_RegionIndex", values.region_index.GetValue()); |
| 47 | log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); | 47 | log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); |
| 48 | log_setting("System_UnsafeMemoryLayout", values.use_unsafe_extended_memory_layout.GetValue()); | ||
| 48 | log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); | 49 | log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); |
| 49 | log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); | 50 | log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); |
| 50 | log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); | 51 | log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); |
| @@ -191,7 +192,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 191 | 192 | ||
| 192 | // Core | 193 | // Core |
| 193 | values.use_multi_core.SetGlobal(true); | 194 | values.use_multi_core.SetGlobal(true); |
| 194 | values.use_extended_memory_layout.SetGlobal(true); | 195 | values.use_unsafe_extended_memory_layout.SetGlobal(true); |
| 195 | 196 | ||
| 196 | // CPU | 197 | // CPU |
| 197 | values.cpu_accuracy.SetGlobal(true); | 198 | values.cpu_accuracy.SetGlobal(true); |
| @@ -205,6 +206,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 205 | // Renderer | 206 | // Renderer |
| 206 | values.fsr_sharpening_slider.SetGlobal(true); | 207 | values.fsr_sharpening_slider.SetGlobal(true); |
| 207 | values.renderer_backend.SetGlobal(true); | 208 | values.renderer_backend.SetGlobal(true); |
| 209 | values.async_presentation.SetGlobal(true); | ||
| 208 | values.renderer_force_max_clock.SetGlobal(true); | 210 | values.renderer_force_max_clock.SetGlobal(true); |
| 209 | values.vulkan_device.SetGlobal(true); | 211 | values.vulkan_device.SetGlobal(true); |
| 210 | values.fullscreen_mode.SetGlobal(true); | 212 | values.fullscreen_mode.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index b77a1580a..adebb0ca7 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -388,7 +388,8 @@ struct Values { | |||
| 388 | 388 | ||
| 389 | // Core | 389 | // Core |
| 390 | SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; | 390 | SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; |
| 391 | SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"}; | 391 | SwitchableSetting<bool> use_unsafe_extended_memory_layout{false, |
| 392 | "use_unsafe_extended_memory_layout"}; | ||
| 392 | 393 | ||
| 393 | // Cpu | 394 | // Cpu |
| 394 | SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, | 395 | SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, |
| @@ -422,6 +423,7 @@ struct Values { | |||
| 422 | // Renderer | 423 | // Renderer |
| 423 | SwitchableSetting<RendererBackend, true> renderer_backend{ | 424 | SwitchableSetting<RendererBackend, true> renderer_backend{ |
| 424 | RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; | 425 | RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; |
| 426 | SwitchableSetting<bool> async_presentation{false, "async_presentation"}; | ||
| 425 | SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; | 427 | SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; |
| 426 | Setting<bool> renderer_debug{false, "debug"}; | 428 | Setting<bool> renderer_debug{false, "debug"}; |
| 427 | Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; | 429 | Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; |
diff --git a/src/core/core.cpp b/src/core/core.cpp index d7bf2bf51..06fba4ce5 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -137,7 +137,7 @@ struct System::Impl { | |||
| 137 | device_memory = std::make_unique<Core::DeviceMemory>(); | 137 | device_memory = std::make_unique<Core::DeviceMemory>(); |
| 138 | 138 | ||
| 139 | is_multicore = Settings::values.use_multi_core.GetValue(); | 139 | is_multicore = Settings::values.use_multi_core.GetValue(); |
| 140 | extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); | 140 | extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue(); |
| 141 | 141 | ||
| 142 | core_timing.SetMulticore(is_multicore); | 142 | core_timing.SetMulticore(is_multicore); |
| 143 | core_timing.Initialize([&system]() { system.RegisterHostThread(); }); | 143 | core_timing.Initialize([&system]() { system.RegisterHostThread(); }); |
| @@ -169,7 +169,7 @@ struct System::Impl { | |||
| 169 | void ReinitializeIfNecessary(System& system) { | 169 | void ReinitializeIfNecessary(System& system) { |
| 170 | const bool must_reinitialize = | 170 | const bool must_reinitialize = |
| 171 | is_multicore != Settings::values.use_multi_core.GetValue() || | 171 | is_multicore != Settings::values.use_multi_core.GetValue() || |
| 172 | extended_memory_layout != Settings::values.use_extended_memory_layout.GetValue(); | 172 | extended_memory_layout != Settings::values.use_unsafe_extended_memory_layout.GetValue(); |
| 173 | 173 | ||
| 174 | if (!must_reinitialize) { | 174 | if (!must_reinitialize) { |
| 175 | return; | 175 | return; |
| @@ -178,7 +178,7 @@ struct System::Impl { | |||
| 178 | LOG_DEBUG(Kernel, "Re-initializing"); | 178 | LOG_DEBUG(Kernel, "Re-initializing"); |
| 179 | 179 | ||
| 180 | is_multicore = Settings::values.use_multi_core.GetValue(); | 180 | is_multicore = Settings::values.use_multi_core.GetValue(); |
| 181 | extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); | 181 | extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue(); |
| 182 | 182 | ||
| 183 | Initialize(system); | 183 | Initialize(system); |
| 184 | } | 184 | } |
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp index 36d0d20d2..49bdc671e 100644 --- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp +++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp | |||
| @@ -35,12 +35,13 @@ namespace { | |||
| 35 | using namespace Common::Literals; | 35 | using namespace Common::Literals; |
| 36 | 36 | ||
| 37 | u32 GetMemorySizeForInit() { | 37 | u32 GetMemorySizeForInit() { |
| 38 | return Settings::values.use_extended_memory_layout ? Smc::MemorySize_8GB : Smc::MemorySize_4GB; | 38 | return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemorySize_8GB |
| 39 | : Smc::MemorySize_4GB; | ||
| 39 | } | 40 | } |
| 40 | 41 | ||
| 41 | Smc::MemoryArrangement GetMemoryArrangeForInit() { | 42 | Smc::MemoryArrangement GetMemoryArrangeForInit() { |
| 42 | return Settings::values.use_extended_memory_layout ? Smc::MemoryArrangement_8GB | 43 | return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemoryArrangement_8GB |
| 43 | : Smc::MemoryArrangement_4GB; | 44 | : Smc::MemoryArrangement_4GB; |
| 44 | } | 45 | } |
| 45 | } // namespace | 46 | } // namespace |
| 46 | 47 | ||
diff --git a/src/core/hle/kernel/k_event_info.h b/src/core/hle/kernel/k_event_info.h index 25b3ff594..eacfa5dc6 100644 --- a/src/core/hle/kernel/k_event_info.h +++ b/src/core/hle/kernel/k_event_info.h | |||
| @@ -5,14 +5,15 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include <boost/intrusive/list.hpp> | 8 | #include "common/intrusive_list.h" |
| 9 | 9 | ||
| 10 | #include "core/hle/kernel/slab_helpers.h" | 10 | #include "core/hle/kernel/slab_helpers.h" |
| 11 | #include "core/hle/kernel/svc_types.h" | 11 | #include "core/hle/kernel/svc_types.h" |
| 12 | 12 | ||
| 13 | namespace Kernel { | 13 | namespace Kernel { |
| 14 | 14 | ||
| 15 | class KEventInfo : public KSlabAllocated<KEventInfo>, public boost::intrusive::list_base_hook<> { | 15 | class KEventInfo : public KSlabAllocated<KEventInfo>, |
| 16 | public Common::IntrusiveListBaseNode<KEventInfo> { | ||
| 16 | public: | 17 | public: |
| 17 | struct InfoCreateThread { | 18 | struct InfoCreateThread { |
| 18 | u32 thread_id{}; | 19 | u32 thread_id{}; |
diff --git a/src/core/hle/kernel/k_object_name.h b/src/core/hle/kernel/k_object_name.h index 2d97fc777..a8876fe37 100644 --- a/src/core/hle/kernel/k_object_name.h +++ b/src/core/hle/kernel/k_object_name.h | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <boost/intrusive/list.hpp> | 8 | |
| 9 | #include "common/intrusive_list.h" | ||
| 9 | 10 | ||
| 10 | #include "core/hle/kernel/k_light_lock.h" | 11 | #include "core/hle/kernel/k_light_lock.h" |
| 11 | #include "core/hle/kernel/slab_helpers.h" | 12 | #include "core/hle/kernel/slab_helpers.h" |
| @@ -15,13 +16,14 @@ namespace Kernel { | |||
| 15 | 16 | ||
| 16 | class KObjectNameGlobalData; | 17 | class KObjectNameGlobalData; |
| 17 | 18 | ||
| 18 | class KObjectName : public KSlabAllocated<KObjectName>, public boost::intrusive::list_base_hook<> { | 19 | class KObjectName : public KSlabAllocated<KObjectName>, |
| 20 | public Common::IntrusiveListBaseNode<KObjectName> { | ||
| 19 | public: | 21 | public: |
| 20 | explicit KObjectName(KernelCore&) {} | 22 | explicit KObjectName(KernelCore&) {} |
| 21 | virtual ~KObjectName() = default; | 23 | virtual ~KObjectName() = default; |
| 22 | 24 | ||
| 23 | static constexpr size_t NameLengthMax = 12; | 25 | static constexpr size_t NameLengthMax = 12; |
| 24 | using List = boost::intrusive::list<KObjectName>; | 26 | using List = Common::IntrusiveListBaseTraits<KObjectName>::ListType; |
| 25 | 27 | ||
| 26 | static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); | 28 | static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); |
| 27 | static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); | 29 | static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); |
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h index 21c040e62..625280290 100644 --- a/src/core/hle/kernel/k_server_port.h +++ b/src/core/hle/kernel/k_server_port.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | 9 | ||
| 10 | #include <boost/intrusive/list.hpp> | 10 | #include "common/intrusive_list.h" |
| 11 | 11 | ||
| 12 | #include "core/hle/kernel/k_server_session.h" | 12 | #include "core/hle/kernel/k_server_session.h" |
| 13 | #include "core/hle/kernel/k_synchronization_object.h" | 13 | #include "core/hle/kernel/k_synchronization_object.h" |
| @@ -42,7 +42,7 @@ public: | |||
| 42 | bool IsSignaled() const override; | 42 | bool IsSignaled() const override; |
| 43 | 43 | ||
| 44 | private: | 44 | private: |
| 45 | using SessionList = boost::intrusive::list<KServerSession>; | 45 | using SessionList = Common::IntrusiveListBaseTraits<KServerSession>::ListType; |
| 46 | 46 | ||
| 47 | void CleanupSessions(); | 47 | void CleanupSessions(); |
| 48 | 48 | ||
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h index 5ee02f556..403891919 100644 --- a/src/core/hle/kernel/k_server_session.h +++ b/src/core/hle/kernel/k_server_session.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | 10 | ||
| 11 | #include <boost/intrusive/list.hpp> | 11 | #include "common/intrusive_list.h" |
| 12 | 12 | ||
| 13 | #include "core/hle/kernel/k_light_lock.h" | 13 | #include "core/hle/kernel/k_light_lock.h" |
| 14 | #include "core/hle/kernel/k_session_request.h" | 14 | #include "core/hle/kernel/k_session_request.h" |
| @@ -27,7 +27,7 @@ class KSession; | |||
| 27 | class KThread; | 27 | class KThread; |
| 28 | 28 | ||
| 29 | class KServerSession final : public KSynchronizationObject, | 29 | class KServerSession final : public KSynchronizationObject, |
| 30 | public boost::intrusive::list_base_hook<> { | 30 | public Common::IntrusiveListBaseNode<KServerSession> { |
| 31 | KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); | 31 | KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); |
| 32 | 32 | ||
| 33 | friend class ServiceThread; | 33 | friend class ServiceThread; |
| @@ -67,7 +67,8 @@ private: | |||
| 67 | KSession* m_parent{}; | 67 | KSession* m_parent{}; |
| 68 | 68 | ||
| 69 | /// List of threads which are pending a reply. | 69 | /// List of threads which are pending a reply. |
| 70 | boost::intrusive::list<KSessionRequest> m_request_list{}; | 70 | using RequestList = Common::IntrusiveListBaseTraits<KSessionRequest>::ListType; |
| 71 | RequestList m_request_list{}; | ||
| 71 | KSessionRequest* m_current_request{}; | 72 | KSessionRequest* m_current_request{}; |
| 72 | 73 | ||
| 73 | KLightLock m_lock; | 74 | KLightLock m_lock; |
diff --git a/src/core/hle/kernel/k_session_request.h b/src/core/hle/kernel/k_session_request.h index b5f04907b..283669e0a 100644 --- a/src/core/hle/kernel/k_session_request.h +++ b/src/core/hle/kernel/k_session_request.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <array> | 6 | #include <array> |
| 7 | 7 | ||
| 8 | #include "common/intrusive_list.h" | ||
| 9 | |||
| 8 | #include "core/hle/kernel/k_auto_object.h" | 10 | #include "core/hle/kernel/k_auto_object.h" |
| 9 | #include "core/hle/kernel/k_event.h" | 11 | #include "core/hle/kernel/k_event.h" |
| 10 | #include "core/hle/kernel/k_memory_block.h" | 12 | #include "core/hle/kernel/k_memory_block.h" |
| @@ -16,7 +18,7 @@ namespace Kernel { | |||
| 16 | 18 | ||
| 17 | class KSessionRequest final : public KSlabAllocated<KSessionRequest>, | 19 | class KSessionRequest final : public KSlabAllocated<KSessionRequest>, |
| 18 | public KAutoObject, | 20 | public KAutoObject, |
| 19 | public boost::intrusive::list_base_hook<> { | 21 | public Common::IntrusiveListBaseNode<KSessionRequest> { |
| 20 | KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); | 22 | KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); |
| 21 | 23 | ||
| 22 | public: | 24 | public: |
diff --git a/src/core/hle/kernel/k_shared_memory_info.h b/src/core/hle/kernel/k_shared_memory_info.h index 75b73ba39..2d8ff20d6 100644 --- a/src/core/hle/kernel/k_shared_memory_info.h +++ b/src/core/hle/kernel/k_shared_memory_info.h | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <boost/intrusive/list.hpp> | 6 | #include "common/intrusive_list.h" |
| 7 | 7 | ||
| 8 | #include "core/hle/kernel/slab_helpers.h" | 8 | #include "core/hle/kernel/slab_helpers.h" |
| 9 | 9 | ||
| @@ -12,7 +12,7 @@ namespace Kernel { | |||
| 12 | class KSharedMemory; | 12 | class KSharedMemory; |
| 13 | 13 | ||
| 14 | class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, | 14 | class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, |
| 15 | public boost::intrusive::list_base_hook<> { | 15 | public Common::IntrusiveListBaseNode<KSharedMemoryInfo> { |
| 16 | 16 | ||
| 17 | public: | 17 | public: |
| 18 | explicit KSharedMemoryInfo(KernelCore&) {} | 18 | explicit KSharedMemoryInfo(KernelCore&) {} |
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h index 9c1a41128..f9814ac8f 100644 --- a/src/core/hle/kernel/k_thread.h +++ b/src/core/hle/kernel/k_thread.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include <vector> | 13 | #include <vector> |
| 14 | 14 | ||
| 15 | #include <boost/intrusive/list.hpp> | 15 | #include "common/intrusive_list.h" |
| 16 | 16 | ||
| 17 | #include "common/intrusive_red_black_tree.h" | 17 | #include "common/intrusive_red_black_tree.h" |
| 18 | #include "common/spin_lock.h" | 18 | #include "common/spin_lock.h" |
| @@ -119,7 +119,7 @@ s32 GetCurrentCoreId(KernelCore& kernel); | |||
| 119 | Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); | 119 | Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); |
| 120 | 120 | ||
| 121 | class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, | 121 | class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, |
| 122 | public boost::intrusive::list_base_hook<>, | 122 | public Common::IntrusiveListBaseNode<KThread>, |
| 123 | public KTimerTask { | 123 | public KTimerTask { |
| 124 | KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); | 124 | KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); |
| 125 | 125 | ||
| @@ -138,7 +138,7 @@ public: | |||
| 138 | public: | 138 | public: |
| 139 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; | 139 | using ThreadContext32 = Core::ARM_Interface::ThreadContext32; |
| 140 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; | 140 | using ThreadContext64 = Core::ARM_Interface::ThreadContext64; |
| 141 | using WaiterList = boost::intrusive::list<KThread>; | 141 | using WaiterList = Common::IntrusiveListBaseTraits<KThread>::ListType; |
| 142 | 142 | ||
| 143 | /** | 143 | /** |
| 144 | * Gets the thread's current priority | 144 | * Gets the thread's current priority |
| @@ -750,8 +750,9 @@ private: | |||
| 750 | ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; | 750 | ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; |
| 751 | 751 | ||
| 752 | public: | 752 | public: |
| 753 | class LockWithPriorityInheritanceInfo : public KSlabAllocated<LockWithPriorityInheritanceInfo>, | 753 | class LockWithPriorityInheritanceInfo |
| 754 | public boost::intrusive::list_base_hook<> { | 754 | : public KSlabAllocated<LockWithPriorityInheritanceInfo>, |
| 755 | public Common::IntrusiveListBaseNode<LockWithPriorityInheritanceInfo> { | ||
| 755 | public: | 756 | public: |
| 756 | explicit LockWithPriorityInheritanceInfo(KernelCore&) {} | 757 | explicit LockWithPriorityInheritanceInfo(KernelCore&) {} |
| 757 | 758 | ||
| @@ -839,7 +840,7 @@ public: | |||
| 839 | 840 | ||
| 840 | private: | 841 | private: |
| 841 | using LockWithPriorityInheritanceInfoList = | 842 | using LockWithPriorityInheritanceInfoList = |
| 842 | boost::intrusive::list<LockWithPriorityInheritanceInfo>; | 843 | Common::IntrusiveListBaseTraits<LockWithPriorityInheritanceInfo>::ListType; |
| 843 | 844 | ||
| 844 | ConditionVariableThreadTree* m_condvar_tree{}; | 845 | ConditionVariableThreadTree* m_condvar_tree{}; |
| 845 | u64 m_condvar_key{}; | 846 | u64 m_condvar_key{}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 0cd87a48f..fee510f7b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -473,7 +473,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) { | |||
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | void EmitSetSampleMask(EmitContext& ctx, Id value) { | 475 | void EmitSetSampleMask(EmitContext& ctx, Id value) { |
| 476 | ctx.OpStore(ctx.sample_mask, value); | 476 | const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)}; |
| 477 | ctx.OpStore(pointer, value); | ||
| 477 | } | 478 | } |
| 478 | 479 | ||
| 479 | void EmitSetFragDepth(EmitContext& ctx, Id value) { | 480 | void EmitSetFragDepth(EmitContext& ctx, Id value) { |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index d48d4860e..47739794f 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -1572,7 +1572,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) { | |||
| 1572 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); | 1572 | Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); |
| 1573 | } | 1573 | } |
| 1574 | if (info.stores_sample_mask) { | 1574 | if (info.stores_sample_mask) { |
| 1575 | sample_mask = DefineOutput(*this, U32[1], std::nullopt); | 1575 | const Id array_type{TypeArray(U32[1], Const(1U))}; |
| 1576 | sample_mask = DefineOutput(*this, array_type, std::nullopt); | ||
| 1576 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); | 1577 | Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); |
| 1577 | } | 1578 | } |
| 1578 | break; | 1579 | break; |
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 39b774c98..1e158f375 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt | |||
| @@ -15,7 +15,7 @@ add_executable(tests | |||
| 15 | core/core_timing.cpp | 15 | core/core_timing.cpp |
| 16 | core/internal_network/network.cpp | 16 | core/internal_network/network.cpp |
| 17 | precompiled_headers.h | 17 | precompiled_headers.h |
| 18 | video_core/buffer_base.cpp | 18 | video_core/memory_tracker.cpp |
| 19 | input_common/calibration_configuration_job.cpp | 19 | input_common/calibration_configuration_job.cpp |
| 20 | ) | 20 | ) |
| 21 | 21 | ||
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp deleted file mode 100644 index 734dbf4b6..000000000 --- a/src/tests/video_core/buffer_base.cpp +++ /dev/null | |||
| @@ -1,549 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <stdexcept> | ||
| 5 | #include <unordered_map> | ||
| 6 | |||
| 7 | #include <catch2/catch_test_macros.hpp> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | using VideoCommon::BufferBase; | ||
| 15 | using Range = std::pair<u64, u64>; | ||
| 16 | |||
| 17 | constexpr u64 PAGE = 4096; | ||
| 18 | constexpr u64 WORD = 4096 * 64; | ||
| 19 | |||
| 20 | constexpr VAddr c = 0x1328914000; | ||
| 21 | |||
| 22 | class RasterizerInterface { | ||
| 23 | public: | ||
| 24 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 25 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | ||
| 26 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | ||
| 27 | Core::Memory::YUZU_PAGEBITS}; | ||
| 28 | for (u64 page = page_start; page < page_end; ++page) { | ||
| 29 | int& value = page_table[page]; | ||
| 30 | value += delta; | ||
| 31 | if (value < 0) { | ||
| 32 | throw std::logic_error{"negative page"}; | ||
| 33 | } | ||
| 34 | if (value == 0) { | ||
| 35 | page_table.erase(page); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] int Count(VAddr addr) const noexcept { | ||
| 41 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 42 | return it == page_table.end() ? 0 : it->second; | ||
| 43 | } | ||
| 44 | |||
| 45 | [[nodiscard]] unsigned Count() const noexcept { | ||
| 46 | unsigned count = 0; | ||
| 47 | for (const auto& [index, value] : page_table) { | ||
| 48 | count += value; | ||
| 49 | } | ||
| 50 | return count; | ||
| 51 | } | ||
| 52 | |||
| 53 | private: | ||
| 54 | std::unordered_map<u64, int> page_table; | ||
| 55 | }; | ||
| 56 | } // Anonymous namespace | ||
| 57 | |||
| 58 | TEST_CASE("BufferBase: Small buffer", "[video_core]") { | ||
| 59 | RasterizerInterface rasterizer; | ||
| 60 | BufferBase buffer(rasterizer, c, WORD); | ||
| 61 | REQUIRE(rasterizer.Count() == 0); | ||
| 62 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 63 | REQUIRE(rasterizer.Count() == WORD / PAGE); | ||
| 64 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0}); | ||
| 65 | |||
| 66 | buffer.MarkRegionAsCpuModified(c + PAGE, 1); | ||
| 67 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2}); | ||
| 68 | } | ||
| 69 | |||
| 70 | TEST_CASE("BufferBase: Large buffer", "[video_core]") { | ||
| 71 | RasterizerInterface rasterizer; | ||
| 72 | BufferBase buffer(rasterizer, c, WORD * 32); | ||
| 73 | buffer.UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 74 | buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4); | ||
| 75 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2}); | ||
| 76 | REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8}); | ||
| 77 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE}); | ||
| 78 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE}); | ||
| 79 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == | ||
| 80 | Range{WORD * 3 + PAGE * 63, WORD * 4}); | ||
| 81 | |||
| 82 | buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); | ||
| 83 | buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 84 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 85 | Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9}); | ||
| 86 | |||
| 87 | buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 88 | REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 89 | Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7}); | ||
| 90 | |||
| 91 | buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); | ||
| 92 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32}); | ||
| 93 | |||
| 94 | buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); | ||
| 95 | buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); | ||
| 96 | |||
| 97 | buffer.UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 98 | REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); | ||
| 99 | } | ||
| 100 | |||
| 101 | TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") { | ||
| 102 | RasterizerInterface rasterizer; | ||
| 103 | BufferBase buffer(rasterizer, c, PAGE * 2); | ||
| 104 | REQUIRE(rasterizer.Count() == 0); | ||
| 105 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 106 | REQUIRE(rasterizer.Count() == 1); | ||
| 107 | buffer.MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 108 | REQUIRE(rasterizer.Count() == 0); | ||
| 109 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 110 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 111 | REQUIRE(rasterizer.Count() == 2); | ||
| 112 | buffer.MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 113 | REQUIRE(rasterizer.Count() == 0); | ||
| 114 | } | ||
| 115 | |||
| 116 | TEST_CASE("BufferBase: Basic range", "[video_core]") { | ||
| 117 | RasterizerInterface rasterizer; | ||
| 118 | BufferBase buffer(rasterizer, c, WORD); | ||
| 119 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 120 | buffer.MarkRegionAsCpuModified(c, PAGE); | ||
| 121 | int num = 0; | ||
| 122 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 123 | REQUIRE(offset == 0U); | ||
| 124 | REQUIRE(size == PAGE); | ||
| 125 | ++num; | ||
| 126 | }); | ||
| 127 | REQUIRE(num == 1U); | ||
| 128 | } | ||
| 129 | |||
| 130 | TEST_CASE("BufferBase: Border upload", "[video_core]") { | ||
| 131 | RasterizerInterface rasterizer; | ||
| 132 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 133 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 134 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 135 | buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { | ||
| 136 | REQUIRE(offset == WORD - PAGE); | ||
| 137 | REQUIRE(size == PAGE * 2); | ||
| 138 | }); | ||
| 139 | } | ||
| 140 | |||
| 141 | TEST_CASE("BufferBase: Border upload range", "[video_core]") { | ||
| 142 | RasterizerInterface rasterizer; | ||
| 143 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 144 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 145 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 146 | buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { | ||
| 147 | REQUIRE(offset == WORD - PAGE); | ||
| 148 | REQUIRE(size == PAGE * 2); | ||
| 149 | }); | ||
| 150 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 151 | buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { | ||
| 152 | REQUIRE(offset == WORD - PAGE); | ||
| 153 | REQUIRE(size == PAGE); | ||
| 154 | }); | ||
| 155 | buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { | ||
| 156 | REQUIRE(offset == WORD); | ||
| 157 | REQUIRE(size == PAGE); | ||
| 158 | }); | ||
| 159 | } | ||
| 160 | |||
| 161 | TEST_CASE("BufferBase: Border upload partial range", "[video_core]") { | ||
| 162 | RasterizerInterface rasterizer; | ||
| 163 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 164 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 165 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 166 | buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { | ||
| 167 | REQUIRE(offset == WORD - PAGE); | ||
| 168 | REQUIRE(size == PAGE * 2); | ||
| 169 | }); | ||
| 170 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 171 | buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { | ||
| 172 | REQUIRE(offset == WORD - PAGE); | ||
| 173 | REQUIRE(size == PAGE); | ||
| 174 | }); | ||
| 175 | buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { | ||
| 176 | REQUIRE(offset == WORD); | ||
| 177 | REQUIRE(size == PAGE); | ||
| 178 | }); | ||
| 179 | } | ||
| 180 | |||
| 181 | TEST_CASE("BufferBase: Partial word uploads", "[video_core]") { | ||
| 182 | RasterizerInterface rasterizer; | ||
| 183 | BufferBase buffer(rasterizer, c, 0x9d000); | ||
| 184 | int num = 0; | ||
| 185 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 186 | REQUIRE(offset == 0U); | ||
| 187 | REQUIRE(size == WORD); | ||
| 188 | ++num; | ||
| 189 | }); | ||
| 190 | REQUIRE(num == 1); | ||
| 191 | buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { | ||
| 192 | REQUIRE(offset == WORD); | ||
| 193 | REQUIRE(size == WORD); | ||
| 194 | ++num; | ||
| 195 | }); | ||
| 196 | REQUIRE(num == 2); | ||
| 197 | buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { | ||
| 198 | REQUIRE(offset == WORD * 2); | ||
| 199 | REQUIRE(size == PAGE * 0x1d); | ||
| 200 | ++num; | ||
| 201 | }); | ||
| 202 | REQUIRE(num == 3); | ||
| 203 | } | ||
| 204 | |||
| 205 | TEST_CASE("BufferBase: Partial page upload", "[video_core]") { | ||
| 206 | RasterizerInterface rasterizer; | ||
| 207 | BufferBase buffer(rasterizer, c, WORD); | ||
| 208 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 209 | int num = 0; | ||
| 210 | buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE); | ||
| 211 | buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE); | ||
| 212 | buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 213 | REQUIRE(offset == PAGE * 2); | ||
| 214 | REQUIRE(size == PAGE); | ||
| 215 | ++num; | ||
| 216 | }); | ||
| 217 | REQUIRE(num == 1); | ||
| 218 | buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 219 | REQUIRE(offset == PAGE * 9); | ||
| 220 | REQUIRE(size == PAGE); | ||
| 221 | ++num; | ||
| 222 | }); | ||
| 223 | REQUIRE(num == 2); | ||
| 224 | } | ||
| 225 | |||
| 226 | TEST_CASE("BufferBase: Partial page upload with multiple words on the right") { | ||
| 227 | RasterizerInterface rasterizer; | ||
| 228 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 229 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 230 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 231 | int num = 0; | ||
| 232 | buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { | ||
| 233 | REQUIRE(offset == PAGE * 13); | ||
| 234 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 235 | ++num; | ||
| 236 | }); | ||
| 237 | REQUIRE(num == 1); | ||
| 238 | buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { | ||
| 239 | REQUIRE(offset == WORD * 7 + PAGE * 10); | ||
| 240 | REQUIRE(size == PAGE * 3); | ||
| 241 | ++num; | ||
| 242 | }); | ||
| 243 | REQUIRE(num == 2); | ||
| 244 | } | ||
| 245 | |||
| 246 | TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") { | ||
| 247 | RasterizerInterface rasterizer; | ||
| 248 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 249 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 250 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 251 | int num = 0; | ||
| 252 | buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { | ||
| 253 | REQUIRE(offset == PAGE * 16); | ||
| 254 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 255 | ++num; | ||
| 256 | }); | ||
| 257 | REQUIRE(num == 1); | ||
| 258 | buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { | ||
| 259 | REQUIRE(offset == PAGE * 13); | ||
| 260 | REQUIRE(size == PAGE * 3); | ||
| 261 | ++num; | ||
| 262 | }); | ||
| 263 | REQUIRE(num == 2); | ||
| 264 | } | ||
| 265 | |||
| 266 | TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") { | ||
| 267 | RasterizerInterface rasterizer; | ||
| 268 | BufferBase buffer(rasterizer, c, WORD * 8); | ||
| 269 | buffer.UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 270 | buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); | ||
| 271 | int num = 0; | ||
| 272 | buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { | ||
| 273 | REQUIRE(offset == PAGE * 16); | ||
| 274 | REQUIRE(size == WORD); | ||
| 275 | ++num; | ||
| 276 | }); | ||
| 277 | REQUIRE(num == 1); | ||
| 278 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 279 | REQUIRE(offset == PAGE * 13); | ||
| 280 | REQUIRE(size == PAGE * 3); | ||
| 281 | ++num; | ||
| 282 | }); | ||
| 283 | REQUIRE(num == 2); | ||
| 284 | buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { | ||
| 285 | REQUIRE(offset == WORD + PAGE * 16); | ||
| 286 | REQUIRE(size == PAGE * 73); | ||
| 287 | ++num; | ||
| 288 | }); | ||
| 289 | REQUIRE(num == 3); | ||
| 290 | } | ||
| 291 | |||
| 292 | TEST_CASE("BufferBase: Empty right bits", "[video_core]") { | ||
| 293 | RasterizerInterface rasterizer; | ||
| 294 | BufferBase buffer(rasterizer, c, WORD * 2048); | ||
| 295 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2048); | ||
| 296 | buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 297 | buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { | ||
| 298 | REQUIRE(offset == WORD - PAGE); | ||
| 299 | REQUIRE(size == PAGE * 2); | ||
| 300 | }); | ||
| 301 | } | ||
| 302 | |||
| 303 | TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") { | ||
| 304 | RasterizerInterface rasterizer; | ||
| 305 | BufferBase buffer(rasterizer, c, WORD); | ||
| 306 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 307 | buffer.MarkRegionAsCpuModified(c, PAGE); | ||
| 308 | int num = 0; | ||
| 309 | buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 310 | buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 311 | buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); | ||
| 312 | REQUIRE(num == 0); | ||
| 313 | buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); | ||
| 314 | REQUIRE(num == 1); | ||
| 315 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 316 | REQUIRE(rasterizer.Count() == 0); | ||
| 317 | } | ||
| 318 | |||
| 319 | TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") { | ||
| 320 | RasterizerInterface rasterizer; | ||
| 321 | BufferBase buffer(rasterizer, c, 0x22000); | ||
| 322 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); | ||
| 323 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); | ||
| 324 | REQUIRE(rasterizer.Count() == 0); | ||
| 325 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); | ||
| 326 | REQUIRE(rasterizer.Count() == 1); | ||
| 327 | REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2)); | ||
| 328 | buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2); | ||
| 329 | buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2); | ||
| 330 | REQUIRE(rasterizer.Count() == 2); | ||
| 331 | } | ||
| 332 | |||
| 333 | TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") { | ||
| 334 | RasterizerInterface rasterizer; | ||
| 335 | BufferBase buffer(rasterizer, c, 0x310720); | ||
| 336 | buffer.UnmarkRegionAsCpuModified(c, 0x310720); | ||
| 337 | REQUIRE(rasterizer.Count(c) == 1); | ||
| 338 | REQUIRE(rasterizer.Count(c + PAGE) == 1); | ||
| 339 | REQUIRE(rasterizer.Count(c + WORD) == 1); | ||
| 340 | REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); | ||
| 341 | } | ||
| 342 | |||
| 343 | TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") { | ||
| 344 | RasterizerInterface rasterizer; | ||
| 345 | BufferBase buffer(rasterizer, c, WORD); | ||
| 346 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 347 | buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE); | ||
| 348 | buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); | ||
| 349 | buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 350 | static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3}; | ||
| 351 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4}; | ||
| 352 | REQUIRE(offset == offsets.at(i)); | ||
| 353 | REQUIRE(size == sizes.at(i)); | ||
| 354 | ++i; | ||
| 355 | }); | ||
| 356 | } | ||
| 357 | |||
| 358 | TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") { | ||
| 359 | RasterizerInterface rasterizer; | ||
| 360 | BufferBase buffer(rasterizer, c, 0x22000); | ||
| 361 | buffer.UnmarkRegionAsCpuModified(c, 0x22000); | ||
| 362 | REQUIRE(rasterizer.Count() == 0x22); | ||
| 363 | buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); | ||
| 364 | buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); | ||
| 365 | buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 366 | static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21}; | ||
| 367 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE}; | ||
| 368 | REQUIRE(offset == offsets.at(i)); | ||
| 369 | REQUIRE(size == sizes.at(i)); | ||
| 370 | ++i; | ||
| 371 | }); | ||
| 372 | } | ||
| 373 | |||
| 374 | TEST_CASE("BufferBase: Single page modified range", "[video_core]") { | ||
| 375 | RasterizerInterface rasterizer; | ||
| 376 | BufferBase buffer(rasterizer, c, PAGE); | ||
| 377 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 378 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 379 | REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); | ||
| 380 | } | ||
| 381 | |||
| 382 | TEST_CASE("BufferBase: Two page modified range", "[video_core]") { | ||
| 383 | RasterizerInterface rasterizer; | ||
| 384 | BufferBase buffer(rasterizer, c, PAGE * 2); | ||
| 385 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 386 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 387 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2)); | ||
| 388 | buffer.UnmarkRegionAsCpuModified(c, PAGE); | ||
| 389 | REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); | ||
| 390 | } | ||
| 391 | |||
| 392 | TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") { | ||
| 393 | for (int offset = 0; offset < 4; ++offset) { | ||
| 394 | const VAddr address = c + WORD * offset; | ||
| 395 | RasterizerInterface rasterizer; | ||
| 396 | BufferBase buffer(rasterizer, address, WORD * 4); | ||
| 397 | REQUIRE(buffer.IsRegionCpuModified(address, PAGE)); | ||
| 398 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE)); | ||
| 399 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE)); | ||
| 400 | |||
| 401 | buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); | ||
| 402 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD)); | ||
| 403 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE)); | ||
| 404 | REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE)); | ||
| 405 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE)); | ||
| 406 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); | ||
| 407 | REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 408 | |||
| 409 | buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); | ||
| 410 | REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") { | ||
| 415 | RasterizerInterface rasterizer; | ||
| 416 | BufferBase buffer(rasterizer, c, WORD * 16); | ||
| 417 | buffer.UnmarkRegionAsCpuModified(c, WORD * 16); | ||
| 418 | REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16)); | ||
| 419 | |||
| 420 | buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); | ||
| 421 | REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16)); | ||
| 422 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2)); | ||
| 423 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2)); | ||
| 424 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2)); | ||
| 425 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); | ||
| 426 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); | ||
| 427 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); | ||
| 428 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); | ||
| 429 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); | ||
| 430 | } | ||
| 431 | |||
| 432 | TEST_CASE("BufferBase: Out of bounds region query") { | ||
| 433 | RasterizerInterface rasterizer; | ||
| 434 | BufferBase buffer(rasterizer, c, WORD * 16); | ||
| 435 | REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE)); | ||
| 436 | REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE)); | ||
| 437 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE)); | ||
| 438 | REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64)); | ||
| 439 | REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64)); | ||
| 440 | } | ||
| 441 | |||
| 442 | TEST_CASE("BufferBase: Wrap word regions") { | ||
| 443 | RasterizerInterface rasterizer; | ||
| 444 | BufferBase buffer(rasterizer, c, WORD * 2); | ||
| 445 | buffer.UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 446 | buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); | ||
| 447 | REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2)); | ||
| 448 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE)); | ||
| 449 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE)); | ||
| 450 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE)); | ||
| 451 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); | ||
| 452 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); | ||
| 453 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); | ||
| 454 | |||
| 455 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 456 | buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE); | ||
| 457 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 458 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE)); | ||
| 459 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE)); | ||
| 460 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); | ||
| 461 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16)); | ||
| 462 | } | ||
| 463 | |||
| 464 | TEST_CASE("BufferBase: Unaligned page region query") { | ||
| 465 | RasterizerInterface rasterizer; | ||
| 466 | BufferBase buffer(rasterizer, c, WORD); | ||
| 467 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 468 | buffer.MarkRegionAsCpuModified(c + 4000, 1000); | ||
| 469 | REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); | ||
| 470 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 471 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); | ||
| 472 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); | ||
| 473 | } | ||
| 474 | |||
| 475 | TEST_CASE("BufferBase: Cached write") { | ||
| 476 | RasterizerInterface rasterizer; | ||
| 477 | BufferBase buffer(rasterizer, c, WORD); | ||
| 478 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 479 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 480 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | buffer.FlushCachedWrites(); | ||
| 482 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 483 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 484 | REQUIRE(rasterizer.Count() == 0); | ||
| 485 | } | ||
| 486 | |||
| 487 | TEST_CASE("BufferBase: Multiple cached write") { | ||
| 488 | RasterizerInterface rasterizer; | ||
| 489 | BufferBase buffer(rasterizer, c, WORD); | ||
| 490 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 491 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 492 | buffer.CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 493 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 494 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 495 | buffer.FlushCachedWrites(); | ||
| 496 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 497 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 498 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 499 | REQUIRE(rasterizer.Count() == 0); | ||
| 500 | } | ||
| 501 | |||
| 502 | TEST_CASE("BufferBase: Cached write unmarked") { | ||
| 503 | RasterizerInterface rasterizer; | ||
| 504 | BufferBase buffer(rasterizer, c, WORD); | ||
| 505 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 506 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 507 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 508 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | buffer.FlushCachedWrites(); | ||
| 510 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 511 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 512 | REQUIRE(rasterizer.Count() == 0); | ||
| 513 | } | ||
| 514 | |||
| 515 | TEST_CASE("BufferBase: Cached write iterated") { | ||
| 516 | RasterizerInterface rasterizer; | ||
| 517 | BufferBase buffer(rasterizer, c, WORD); | ||
| 518 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 519 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 520 | int num = 0; | ||
| 521 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 522 | REQUIRE(num == 0); | ||
| 523 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | buffer.FlushCachedWrites(); | ||
| 525 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 526 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 527 | REQUIRE(rasterizer.Count() == 0); | ||
| 528 | } | ||
| 529 | |||
| 530 | TEST_CASE("BufferBase: Cached write downloads") { | ||
| 531 | RasterizerInterface rasterizer; | ||
| 532 | BufferBase buffer(rasterizer, c, WORD); | ||
| 533 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 534 | REQUIRE(rasterizer.Count() == 64); | ||
| 535 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 536 | REQUIRE(rasterizer.Count() == 63); | ||
| 537 | buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 538 | int num = 0; | ||
| 539 | buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | buffer.FlushCachedWrites(); | ||
| 545 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } | ||
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp new file mode 100644 index 000000000..3981907a2 --- /dev/null +++ b/src/tests/video_core/memory_tracker.cpp | |||
| @@ -0,0 +1,549 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include <memory> | ||
| 5 | #include <stdexcept> | ||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include <catch2/catch_test_macros.hpp> | ||
| 9 | |||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | using Range = std::pair<u64, u64>; | ||
| 16 | |||
| 17 | constexpr u64 PAGE = 4096; | ||
| 18 | constexpr u64 WORD = 4096 * 64; | ||
| 19 | constexpr u64 HIGH_PAGE_BITS = 22; | ||
| 20 | constexpr u64 HIGH_PAGE_SIZE = 1ULL << HIGH_PAGE_BITS; | ||
| 21 | |||
| 22 | constexpr VAddr c = 16 * HIGH_PAGE_SIZE; | ||
| 23 | |||
| 24 | class RasterizerInterface { | ||
| 25 | public: | ||
| 26 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 27 | const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS}; | ||
| 28 | const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >> | ||
| 29 | Core::Memory::YUZU_PAGEBITS}; | ||
| 30 | for (u64 page = page_start; page < page_end; ++page) { | ||
| 31 | int& value = page_table[page]; | ||
| 32 | value += delta; | ||
| 33 | if (value < 0) { | ||
| 34 | throw std::logic_error{"negative page"}; | ||
| 35 | } | ||
| 36 | if (value == 0) { | ||
| 37 | page_table.erase(page); | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] int Count(VAddr addr) const noexcept { | ||
| 43 | const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS); | ||
| 44 | return it == page_table.end() ? 0 : it->second; | ||
| 45 | } | ||
| 46 | |||
| 47 | [[nodiscard]] unsigned Count() const noexcept { | ||
| 48 | unsigned count = 0; | ||
| 49 | for (const auto& [index, value] : page_table) { | ||
| 50 | count += value; | ||
| 51 | } | ||
| 52 | return count; | ||
| 53 | } | ||
| 54 | |||
| 55 | private: | ||
| 56 | std::unordered_map<u64, int> page_table; | ||
| 57 | }; | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>; | ||
| 61 | |||
| 62 | TEST_CASE("MemoryTracker: Small region", "[video_core]") { | ||
| 63 | RasterizerInterface rasterizer; | ||
| 64 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 65 | REQUIRE(rasterizer.Count() == 0); | ||
| 66 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 67 | REQUIRE(rasterizer.Count() == WORD / PAGE); | ||
| 68 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{0, 0}); | ||
| 69 | |||
| 70 | memory_track->MarkRegionAsCpuModified(c + PAGE, 1); | ||
| 71 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{c + PAGE * 1, c + PAGE * 2}); | ||
| 72 | } | ||
| 73 | |||
| 74 | TEST_CASE("MemoryTracker: Large region", "[video_core]") { | ||
| 75 | RasterizerInterface rasterizer; | ||
| 76 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 77 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 78 | memory_track->MarkRegionAsCpuModified(c + 4096, WORD * 4); | ||
| 79 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD + PAGE * 2) == | ||
| 80 | Range{c + PAGE, c + WORD + PAGE * 2}); | ||
| 81 | REQUIRE(memory_track->ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == | ||
| 82 | Range{c + PAGE * 2, c + PAGE * 8}); | ||
| 83 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 4 + PAGE}); | ||
| 84 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 4, PAGE) == | ||
| 85 | Range{c + WORD * 4, c + WORD * 4 + PAGE}); | ||
| 86 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == | ||
| 87 | Range{c + WORD * 3 + PAGE * 63, c + WORD * 4}); | ||
| 88 | |||
| 89 | memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); | ||
| 90 | memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 91 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 92 | Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 9}); | ||
| 93 | |||
| 94 | memory_track->UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); | ||
| 95 | REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) == | ||
| 96 | Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 7}); | ||
| 97 | |||
| 98 | memory_track->MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); | ||
| 99 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 32}); | ||
| 100 | |||
| 101 | memory_track->UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); | ||
| 102 | memory_track->UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); | ||
| 103 | |||
| 104 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 105 | REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); | ||
| 106 | } | ||
| 107 | |||
| 108 | TEST_CASE("MemoryTracker: Rasterizer counting", "[video_core]") { | ||
| 109 | RasterizerInterface rasterizer; | ||
| 110 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 111 | REQUIRE(rasterizer.Count() == 0); | ||
| 112 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 113 | REQUIRE(rasterizer.Count() == 1); | ||
| 114 | memory_track->MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 115 | REQUIRE(rasterizer.Count() == 0); | ||
| 116 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 117 | memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 118 | REQUIRE(rasterizer.Count() == 2); | ||
| 119 | memory_track->MarkRegionAsCpuModified(c, PAGE * 2); | ||
| 120 | REQUIRE(rasterizer.Count() == 0); | ||
| 121 | } | ||
| 122 | |||
| 123 | TEST_CASE("MemoryTracker: Basic range", "[video_core]") { | ||
| 124 | RasterizerInterface rasterizer; | ||
| 125 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 126 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 127 | memory_track->MarkRegionAsCpuModified(c, PAGE); | ||
| 128 | int num = 0; | ||
| 129 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 130 | REQUIRE(offset == c); | ||
| 131 | REQUIRE(size == PAGE); | ||
| 132 | ++num; | ||
| 133 | }); | ||
| 134 | REQUIRE(num == 1U); | ||
| 135 | } | ||
| 136 | |||
| 137 | TEST_CASE("MemoryTracker: Border upload", "[video_core]") { | ||
| 138 | RasterizerInterface rasterizer; | ||
| 139 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 140 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 141 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 142 | memory_track->ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { | ||
| 143 | REQUIRE(offset == c + WORD - PAGE); | ||
| 144 | REQUIRE(size == PAGE * 2); | ||
| 145 | }); | ||
| 146 | } | ||
| 147 | |||
| 148 | TEST_CASE("MemoryTracker: Border upload range", "[video_core]") { | ||
| 149 | RasterizerInterface rasterizer; | ||
| 150 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 151 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 152 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 153 | memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { | ||
| 154 | REQUIRE(offset == c + WORD - PAGE); | ||
| 155 | REQUIRE(size == PAGE * 2); | ||
| 156 | }); | ||
| 157 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 158 | memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { | ||
| 159 | REQUIRE(offset == c + WORD - PAGE); | ||
| 160 | REQUIRE(size == PAGE); | ||
| 161 | }); | ||
| 162 | memory_track->ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { | ||
| 163 | REQUIRE(offset == c + WORD); | ||
| 164 | REQUIRE(size == PAGE); | ||
| 165 | }); | ||
| 166 | } | ||
| 167 | |||
| 168 | TEST_CASE("MemoryTracker: Border upload partial range", "[video_core]") { | ||
| 169 | RasterizerInterface rasterizer; | ||
| 170 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 171 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2); | ||
| 172 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 173 | memory_track->ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { | ||
| 174 | REQUIRE(offset == c + WORD - PAGE); | ||
| 175 | REQUIRE(size == PAGE * 2); | ||
| 176 | }); | ||
| 177 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 178 | memory_track->ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { | ||
| 179 | REQUIRE(offset == c + WORD - PAGE); | ||
| 180 | REQUIRE(size == PAGE); | ||
| 181 | }); | ||
| 182 | memory_track->ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { | ||
| 183 | REQUIRE(offset == c + WORD); | ||
| 184 | REQUIRE(size == PAGE); | ||
| 185 | }); | ||
| 186 | } | ||
| 187 | |||
| 188 | TEST_CASE("MemoryTracker: Partial word uploads", "[video_core]") { | ||
| 189 | RasterizerInterface rasterizer; | ||
| 190 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 191 | int num = 0; | ||
| 192 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 193 | REQUIRE(offset == c); | ||
| 194 | REQUIRE(size == WORD); | ||
| 195 | ++num; | ||
| 196 | }); | ||
| 197 | REQUIRE(num == 1); | ||
| 198 | memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { | ||
| 199 | REQUIRE(offset == c + WORD); | ||
| 200 | REQUIRE(size == WORD); | ||
| 201 | ++num; | ||
| 202 | }); | ||
| 203 | REQUIRE(num == 2); | ||
| 204 | memory_track->ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { | ||
| 205 | REQUIRE(offset == c + WORD * 2); | ||
| 206 | REQUIRE(size == PAGE * 0x1d); | ||
| 207 | ++num; | ||
| 208 | }); | ||
| 209 | REQUIRE(num == 3); | ||
| 210 | } | ||
| 211 | |||
| 212 | TEST_CASE("MemoryTracker: Partial page upload", "[video_core]") { | ||
| 213 | RasterizerInterface rasterizer; | ||
| 214 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 215 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 216 | int num = 0; | ||
| 217 | memory_track->MarkRegionAsCpuModified(c + PAGE * 2, PAGE); | ||
| 218 | memory_track->MarkRegionAsCpuModified(c + PAGE * 9, PAGE); | ||
| 219 | memory_track->ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 220 | REQUIRE(offset == c + PAGE * 2); | ||
| 221 | REQUIRE(size == PAGE); | ||
| 222 | ++num; | ||
| 223 | }); | ||
| 224 | REQUIRE(num == 1); | ||
| 225 | memory_track->ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { | ||
| 226 | REQUIRE(offset == c + PAGE * 9); | ||
| 227 | REQUIRE(size == PAGE); | ||
| 228 | ++num; | ||
| 229 | }); | ||
| 230 | REQUIRE(num == 2); | ||
| 231 | } | ||
| 232 | |||
| 233 | TEST_CASE("MemoryTracker: Partial page upload with multiple words on the right") { | ||
| 234 | RasterizerInterface rasterizer; | ||
| 235 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 236 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 9); | ||
| 237 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 238 | int num = 0; | ||
| 239 | memory_track->ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { | ||
| 240 | REQUIRE(offset == c + PAGE * 13); | ||
| 241 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 242 | ++num; | ||
| 243 | }); | ||
| 244 | REQUIRE(num == 1); | ||
| 245 | memory_track->ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { | ||
| 246 | REQUIRE(offset == c + WORD * 7 + PAGE * 10); | ||
| 247 | REQUIRE(size == PAGE * 3); | ||
| 248 | ++num; | ||
| 249 | }); | ||
| 250 | REQUIRE(num == 2); | ||
| 251 | } | ||
| 252 | |||
| 253 | TEST_CASE("MemoryTracker: Partial page upload with multiple words on the left", "[video_core]") { | ||
| 254 | RasterizerInterface rasterizer; | ||
| 255 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 256 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 257 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); | ||
| 258 | int num = 0; | ||
| 259 | memory_track->ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { | ||
| 260 | REQUIRE(offset == c + PAGE * 16); | ||
| 261 | REQUIRE(size == WORD * 7 - PAGE * 3); | ||
| 262 | ++num; | ||
| 263 | }); | ||
| 264 | REQUIRE(num == 1); | ||
| 265 | memory_track->ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { | ||
| 266 | REQUIRE(offset == c + PAGE * 13); | ||
| 267 | REQUIRE(size == PAGE * 3); | ||
| 268 | ++num; | ||
| 269 | }); | ||
| 270 | REQUIRE(num == 2); | ||
| 271 | } | ||
| 272 | |||
| 273 | TEST_CASE("MemoryTracker: Partial page upload with multiple words in the middle", "[video_core]") { | ||
| 274 | RasterizerInterface rasterizer; | ||
| 275 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 276 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 8); | ||
| 277 | memory_track->MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); | ||
| 278 | int num = 0; | ||
| 279 | memory_track->ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { | ||
| 280 | REQUIRE(offset == c + PAGE * 16); | ||
| 281 | REQUIRE(size == WORD); | ||
| 282 | ++num; | ||
| 283 | }); | ||
| 284 | REQUIRE(num == 1); | ||
| 285 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { | ||
| 286 | REQUIRE(offset == c + PAGE * 13); | ||
| 287 | REQUIRE(size == PAGE * 3); | ||
| 288 | ++num; | ||
| 289 | }); | ||
| 290 | REQUIRE(num == 2); | ||
| 291 | memory_track->ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { | ||
| 292 | REQUIRE(offset == c + WORD + PAGE * 16); | ||
| 293 | REQUIRE(size == PAGE * 73); | ||
| 294 | ++num; | ||
| 295 | }); | ||
| 296 | REQUIRE(num == 3); | ||
| 297 | } | ||
| 298 | |||
| 299 | TEST_CASE("MemoryTracker: Empty right bits", "[video_core]") { | ||
| 300 | RasterizerInterface rasterizer; | ||
| 301 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 302 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 2048); | ||
| 303 | memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); | ||
| 304 | memory_track->ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { | ||
| 305 | REQUIRE(offset == c + WORD - PAGE); | ||
| 306 | REQUIRE(size == PAGE * 2); | ||
| 307 | }); | ||
| 308 | } | ||
| 309 | |||
| 310 | TEST_CASE("MemoryTracker: Out of bound ranges 1", "[video_core]") { | ||
| 311 | RasterizerInterface rasterizer; | ||
| 312 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 313 | memory_track->UnmarkRegionAsCpuModified(c - WORD, 3 * WORD); | ||
| 314 | memory_track->MarkRegionAsCpuModified(c, PAGE); | ||
| 315 | REQUIRE(rasterizer.Count() == (3 * WORD - PAGE) / PAGE); | ||
| 316 | int num = 0; | ||
| 317 | memory_track->ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 318 | memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 319 | memory_track->ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); | ||
| 320 | REQUIRE(num == 0); | ||
| 321 | memory_track->ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); | ||
| 322 | REQUIRE(num == 1); | ||
| 323 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 324 | REQUIRE(rasterizer.Count() == 2 * WORD / PAGE); | ||
| 325 | } | ||
| 326 | |||
| 327 | TEST_CASE("MemoryTracker: Out of bound ranges 2", "[video_core]") { | ||
| 328 | RasterizerInterface rasterizer; | ||
| 329 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 330 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); | ||
| 331 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); | ||
| 332 | REQUIRE(rasterizer.Count() == 2); | ||
| 333 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); | ||
| 334 | REQUIRE(rasterizer.Count() == 3); | ||
| 335 | REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c - PAGE, PAGE * 2)); | ||
| 336 | memory_track->UnmarkRegionAsCpuModified(c - PAGE * 3, PAGE * 2); | ||
| 337 | memory_track->UnmarkRegionAsCpuModified(c - PAGE * 2, PAGE * 2); | ||
| 338 | REQUIRE(rasterizer.Count() == 7); | ||
| 339 | } | ||
| 340 | |||
| 341 | TEST_CASE("MemoryTracker: Out of bound ranges 3", "[video_core]") { | ||
| 342 | RasterizerInterface rasterizer; | ||
| 343 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 344 | memory_track->UnmarkRegionAsCpuModified(c, 0x310720); | ||
| 345 | REQUIRE(rasterizer.Count(c) == 1); | ||
| 346 | REQUIRE(rasterizer.Count(c + PAGE) == 1); | ||
| 347 | REQUIRE(rasterizer.Count(c + WORD) == 1); | ||
| 348 | REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); | ||
| 349 | } | ||
| 350 | |||
| 351 | TEST_CASE("MemoryTracker: Sparse regions 1", "[video_core]") { | ||
| 352 | RasterizerInterface rasterizer; | ||
| 353 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 354 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 355 | memory_track->MarkRegionAsCpuModified(c + PAGE * 1, PAGE); | ||
| 356 | memory_track->MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); | ||
| 357 | memory_track->ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { | ||
| 358 | static constexpr std::array<u64, 2> offsets{c + PAGE, c + PAGE * 3}; | ||
| 359 | static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4}; | ||
| 360 | REQUIRE(offset == offsets.at(i)); | ||
| 361 | REQUIRE(size == sizes.at(i)); | ||
| 362 | ++i; | ||
| 363 | }); | ||
| 364 | } | ||
| 365 | |||
| 366 | TEST_CASE("MemoryTracker: Sparse regions 2", "[video_core]") { | ||
| 367 | RasterizerInterface rasterizer; | ||
| 368 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 369 | memory_track->UnmarkRegionAsCpuModified(c, PAGE * 0x23); | ||
| 370 | REQUIRE(rasterizer.Count() == 0x23); | ||
| 371 | memory_track->MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); | ||
| 372 | memory_track->MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); | ||
| 373 | memory_track->ForEachUploadRange(c, PAGE * 0x23, [i = 0](u64 offset, u64 size) mutable { | ||
| 374 | static constexpr std::array<u64, 3> offsets{c + PAGE * 0x1B, c + PAGE * 0x21}; | ||
| 375 | static constexpr std::array<u64, 3> sizes{PAGE, PAGE}; | ||
| 376 | REQUIRE(offset == offsets.at(i)); | ||
| 377 | REQUIRE(size == sizes.at(i)); | ||
| 378 | ++i; | ||
| 379 | }); | ||
| 380 | } | ||
| 381 | |||
| 382 | TEST_CASE("MemoryTracker: Single page modified range", "[video_core]") { | ||
| 383 | RasterizerInterface rasterizer; | ||
| 384 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 385 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 386 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 387 | REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 388 | } | ||
| 389 | |||
| 390 | TEST_CASE("MemoryTracker: Two page modified range", "[video_core]") { | ||
| 391 | RasterizerInterface rasterizer; | ||
| 392 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 393 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 394 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 395 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE * 2)); | ||
| 396 | memory_track->UnmarkRegionAsCpuModified(c, PAGE); | ||
| 397 | REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 398 | } | ||
| 399 | |||
| 400 | TEST_CASE("MemoryTracker: Multi word modified ranges", "[video_core]") { | ||
| 401 | for (int offset = 0; offset < 4; ++offset) { | ||
| 402 | const VAddr address = c + WORD * offset; | ||
| 403 | RasterizerInterface rasterizer; | ||
| 404 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 405 | REQUIRE(memory_track->IsRegionCpuModified(address, PAGE)); | ||
| 406 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 48, PAGE)); | ||
| 407 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 56, PAGE)); | ||
| 408 | |||
| 409 | memory_track->UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); | ||
| 410 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE, WORD)); | ||
| 411 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE)); | ||
| 412 | REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE)); | ||
| 413 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 33, PAGE)); | ||
| 414 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); | ||
| 415 | REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 416 | |||
| 417 | memory_track->UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); | ||
| 418 | REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); | ||
| 419 | } | ||
| 420 | } | ||
| 421 | |||
| 422 | TEST_CASE("MemoryTracker: Single page in large region", "[video_core]") { | ||
| 423 | RasterizerInterface rasterizer; | ||
| 424 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 425 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 16); | ||
| 426 | REQUIRE(!memory_track->IsRegionCpuModified(c, WORD * 16)); | ||
| 427 | |||
| 428 | memory_track->MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); | ||
| 429 | REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 16)); | ||
| 430 | REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 10, WORD * 2)); | ||
| 431 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 11, WORD * 2)); | ||
| 432 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12, WORD * 2)); | ||
| 433 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); | ||
| 434 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); | ||
| 435 | REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); | ||
| 436 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); | ||
| 437 | REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); | ||
| 438 | } | ||
| 439 | |||
| 440 | TEST_CASE("MemoryTracker: Wrap word regions") { | ||
| 441 | RasterizerInterface rasterizer; | ||
| 442 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 443 | memory_track->UnmarkRegionAsCpuModified(c, WORD * 32); | ||
| 444 | memory_track->MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); | ||
| 445 | REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 2)); | ||
| 446 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 62, PAGE)); | ||
| 447 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE)); | ||
| 448 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 64, PAGE)); | ||
| 449 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); | ||
| 450 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); | ||
| 451 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); | ||
| 452 | |||
| 453 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 454 | memory_track->MarkRegionAsCpuModified(c + PAGE * 127, PAGE); | ||
| 455 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16)); | ||
| 456 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, PAGE)); | ||
| 457 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE)); | ||
| 458 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); | ||
| 459 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 128, WORD * 16)); | ||
| 460 | } | ||
| 461 | |||
| 462 | TEST_CASE("MemoryTracker: Unaligned page region query") { | ||
| 463 | RasterizerInterface rasterizer; | ||
| 464 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 465 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 466 | memory_track->MarkRegionAsCpuModified(c + 4000, 1000); | ||
| 467 | REQUIRE(memory_track->IsRegionCpuModified(c, PAGE)); | ||
| 468 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 469 | REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1000)); | ||
| 470 | REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1)); | ||
| 471 | } | ||
| 472 | |||
| 473 | TEST_CASE("MemoryTracker: Cached write") { | ||
| 474 | RasterizerInterface rasterizer; | ||
| 475 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 476 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 477 | memory_track->CachedCpuWrite(c + PAGE, c + PAGE); | ||
| 478 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 479 | memory_track->FlushCachedWrites(); | ||
| 480 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 482 | REQUIRE(rasterizer.Count() == 0); | ||
| 483 | } | ||
| 484 | |||
| 485 | TEST_CASE("MemoryTracker: Multiple cached write") { | ||
| 486 | RasterizerInterface rasterizer; | ||
| 487 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 488 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 489 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 490 | memory_track->CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 491 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 492 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 493 | memory_track->FlushCachedWrites(); | ||
| 494 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 495 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 496 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 497 | REQUIRE(rasterizer.Count() == 0); | ||
| 498 | } | ||
| 499 | |||
| 500 | TEST_CASE("MemoryTracker: Cached write unmarked") { | ||
| 501 | RasterizerInterface rasterizer; | ||
| 502 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 503 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 504 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 505 | memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 506 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 507 | memory_track->FlushCachedWrites(); | ||
| 508 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 510 | REQUIRE(rasterizer.Count() == 0); | ||
| 511 | } | ||
| 512 | |||
| 513 | TEST_CASE("MemoryTracker: Cached write iterated") { | ||
| 514 | RasterizerInterface rasterizer; | ||
| 515 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 516 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 517 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 518 | int num = 0; | ||
| 519 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 520 | REQUIRE(num == 0); | ||
| 521 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 522 | memory_track->FlushCachedWrites(); | ||
| 523 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 525 | REQUIRE(rasterizer.Count() == 0); | ||
| 526 | } | ||
| 527 | |||
| 528 | TEST_CASE("MemoryTracker: Cached write downloads") { | ||
| 529 | RasterizerInterface rasterizer; | ||
| 530 | std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer)); | ||
| 531 | memory_track->UnmarkRegionAsCpuModified(c, WORD); | ||
| 532 | REQUIRE(rasterizer.Count() == 64); | ||
| 533 | memory_track->CachedCpuWrite(c + PAGE, PAGE); | ||
| 534 | REQUIRE(rasterizer.Count() == 63); | ||
| 535 | memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 536 | int num = 0; | ||
| 537 | memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 538 | REQUIRE(num == 1); | ||
| 539 | num = 0; | ||
| 540 | memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | memory_track->FlushCachedWrites(); | ||
| 545 | REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | memory_track->MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } \ No newline at end of file | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e904573d7..a0009a36f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -11,8 +11,11 @@ endif() | |||
| 11 | 11 | ||
| 12 | add_library(video_core STATIC | 12 | add_library(video_core STATIC |
| 13 | buffer_cache/buffer_base.h | 13 | buffer_cache/buffer_base.h |
| 14 | buffer_cache/buffer_cache_base.h | ||
| 14 | buffer_cache/buffer_cache.cpp | 15 | buffer_cache/buffer_cache.cpp |
| 15 | buffer_cache/buffer_cache.h | 16 | buffer_cache/buffer_cache.h |
| 17 | buffer_cache/memory_tracker_base.h | ||
| 18 | buffer_cache/word_manager.h | ||
| 16 | cache_types.h | 19 | cache_types.h |
| 17 | cdma_pusher.cpp | 20 | cdma_pusher.cpp |
| 18 | cdma_pusher.h | 21 | cdma_pusher.h |
| @@ -104,6 +107,7 @@ add_library(video_core STATIC | |||
| 104 | renderer_null/renderer_null.h | 107 | renderer_null/renderer_null.h |
| 105 | renderer_opengl/blit_image.cpp | 108 | renderer_opengl/blit_image.cpp |
| 106 | renderer_opengl/blit_image.h | 109 | renderer_opengl/blit_image.h |
| 110 | renderer_opengl/gl_buffer_cache_base.cpp | ||
| 107 | renderer_opengl/gl_buffer_cache.cpp | 111 | renderer_opengl/gl_buffer_cache.cpp |
| 108 | renderer_opengl/gl_buffer_cache.h | 112 | renderer_opengl/gl_buffer_cache.h |
| 109 | renderer_opengl/gl_compute_pipeline.cpp | 113 | renderer_opengl/gl_compute_pipeline.cpp |
| @@ -154,6 +158,7 @@ add_library(video_core STATIC | |||
| 154 | renderer_vulkan/renderer_vulkan.cpp | 158 | renderer_vulkan/renderer_vulkan.cpp |
| 155 | renderer_vulkan/vk_blit_screen.cpp | 159 | renderer_vulkan/vk_blit_screen.cpp |
| 156 | renderer_vulkan/vk_blit_screen.h | 160 | renderer_vulkan/vk_blit_screen.h |
| 161 | renderer_vulkan/vk_buffer_cache_base.cpp | ||
| 157 | renderer_vulkan/vk_buffer_cache.cpp | 162 | renderer_vulkan/vk_buffer_cache.cpp |
| 158 | renderer_vulkan/vk_buffer_cache.h | 163 | renderer_vulkan/vk_buffer_cache.h |
| 159 | renderer_vulkan/vk_command_pool.cpp | 164 | renderer_vulkan/vk_command_pool.cpp |
| @@ -174,6 +179,8 @@ add_library(video_core STATIC | |||
| 174 | renderer_vulkan/vk_master_semaphore.h | 179 | renderer_vulkan/vk_master_semaphore.h |
| 175 | renderer_vulkan/vk_pipeline_cache.cpp | 180 | renderer_vulkan/vk_pipeline_cache.cpp |
| 176 | renderer_vulkan/vk_pipeline_cache.h | 181 | renderer_vulkan/vk_pipeline_cache.h |
| 182 | renderer_vulkan/vk_present_manager.cpp | ||
| 183 | renderer_vulkan/vk_present_manager.h | ||
| 177 | renderer_vulkan/vk_query_cache.cpp | 184 | renderer_vulkan/vk_query_cache.cpp |
| 178 | renderer_vulkan/vk_query_cache.h | 185 | renderer_vulkan/vk_query_cache.h |
| 179 | renderer_vulkan/vk_rasterizer.cpp | 186 | renderer_vulkan/vk_rasterizer.cpp |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 1b4d63616..9cbd95c4b 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| @@ -11,9 +11,7 @@ | |||
| 11 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 12 | #include "common/common_funcs.h" | 12 | #include "common/common_funcs.h" |
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "common/div_ceil.h" | 14 | #include "video_core/buffer_cache/word_manager.h" |
| 15 | #include "common/settings.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | 15 | ||
| 18 | namespace VideoCommon { | 16 | namespace VideoCommon { |
| 19 | 17 | ||
| @@ -36,116 +34,12 @@ struct NullBufferParams {}; | |||
| 36 | */ | 34 | */ |
| 37 | template <class RasterizerInterface> | 35 | template <class RasterizerInterface> |
| 38 | class BufferBase { | 36 | class BufferBase { |
| 39 | static constexpr u64 PAGES_PER_WORD = 64; | ||
| 40 | static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | ||
| 41 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||
| 42 | |||
| 43 | /// Vector tracking modified pages tightly packed with small vector optimization | ||
| 44 | union WordsArray { | ||
| 45 | /// Returns the pointer to the words state | ||
| 46 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||
| 47 | return is_short ? &stack : heap; | ||
| 48 | } | ||
| 49 | |||
| 50 | /// Returns the pointer to the words state | ||
| 51 | [[nodiscard]] u64* Pointer(bool is_short) noexcept { | ||
| 52 | return is_short ? &stack : heap; | ||
| 53 | } | ||
| 54 | |||
| 55 | u64 stack = 0; ///< Small buffers storage | ||
| 56 | u64* heap; ///< Not-small buffers pointer to the storage | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct Words { | ||
| 60 | explicit Words() = default; | ||
| 61 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||
| 62 | if (IsShort()) { | ||
| 63 | cpu.stack = ~u64{0}; | ||
| 64 | gpu.stack = 0; | ||
| 65 | cached_cpu.stack = 0; | ||
| 66 | untracked.stack = ~u64{0}; | ||
| 67 | } else { | ||
| 68 | // Share allocation between CPU and GPU pages and set their default values | ||
| 69 | const size_t num_words = NumWords(); | ||
| 70 | u64* const alloc = new u64[num_words * 4]; | ||
| 71 | cpu.heap = alloc; | ||
| 72 | gpu.heap = alloc + num_words; | ||
| 73 | cached_cpu.heap = alloc + num_words * 2; | ||
| 74 | untracked.heap = alloc + num_words * 3; | ||
| 75 | std::fill_n(cpu.heap, num_words, ~u64{0}); | ||
| 76 | std::fill_n(gpu.heap, num_words, 0); | ||
| 77 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 78 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 79 | } | ||
| 80 | // Clean up tailing bits | ||
| 81 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||
| 82 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||
| 83 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||
| 84 | const u64 last_word = (~u64{0} << shift) >> shift; | ||
| 85 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 86 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 87 | } | ||
| 88 | |||
| 89 | ~Words() { | ||
| 90 | Release(); | ||
| 91 | } | ||
| 92 | |||
| 93 | Words& operator=(Words&& rhs) noexcept { | ||
| 94 | Release(); | ||
| 95 | size_bytes = rhs.size_bytes; | ||
| 96 | cpu = rhs.cpu; | ||
| 97 | gpu = rhs.gpu; | ||
| 98 | cached_cpu = rhs.cached_cpu; | ||
| 99 | untracked = rhs.untracked; | ||
| 100 | rhs.cpu.heap = nullptr; | ||
| 101 | return *this; | ||
| 102 | } | ||
| 103 | |||
| 104 | Words(Words&& rhs) noexcept | ||
| 105 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||
| 106 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 107 | rhs.cpu.heap = nullptr; | ||
| 108 | } | ||
| 109 | |||
| 110 | Words& operator=(const Words&) = delete; | ||
| 111 | Words(const Words&) = delete; | ||
| 112 | |||
| 113 | /// Returns true when the buffer fits in the small vector optimization | ||
| 114 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 115 | return size_bytes <= BYTES_PER_WORD; | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Returns the number of words of the buffer | ||
| 119 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 120 | return Common::DivCeil(size_bytes, BYTES_PER_WORD); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Release buffer resources | ||
| 124 | void Release() { | ||
| 125 | if (!IsShort()) { | ||
| 126 | // CPU written words is the base for the heap allocation | ||
| 127 | delete[] cpu.heap; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | u64 size_bytes = 0; | ||
| 132 | WordsArray cpu; | ||
| 133 | WordsArray gpu; | ||
| 134 | WordsArray cached_cpu; | ||
| 135 | WordsArray untracked; | ||
| 136 | }; | ||
| 137 | |||
| 138 | enum class Type { | ||
| 139 | CPU, | ||
| 140 | GPU, | ||
| 141 | CachedCPU, | ||
| 142 | Untracked, | ||
| 143 | }; | ||
| 144 | |||
| 145 | public: | 37 | public: |
| 146 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) | 38 | static constexpr u64 BASE_PAGE_BITS = 16; |
| 147 | : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, | 39 | static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; |
| 148 | words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} | 40 | |
| 41 | explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_) | ||
| 42 | : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} | ||
| 149 | 43 | ||
| 150 | explicit BufferBase(NullBufferParams) {} | 44 | explicit BufferBase(NullBufferParams) {} |
| 151 | 45 | ||
| @@ -155,100 +49,6 @@ public: | |||
| 155 | BufferBase& operator=(BufferBase&&) = default; | 49 | BufferBase& operator=(BufferBase&&) = default; |
| 156 | BufferBase(BufferBase&&) = default; | 50 | BufferBase(BufferBase&&) = default; |
| 157 | 51 | ||
| 158 | /// Returns the inclusive CPU modified range in a begin end pair | ||
| 159 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||
| 160 | u64 query_size) const noexcept { | ||
| 161 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 162 | return ModifiedRegion<Type::CPU>(offset, query_size); | ||
| 163 | } | ||
| 164 | |||
| 165 | /// Returns the inclusive GPU modified range in a begin end pair | ||
| 166 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||
| 167 | u64 query_size) const noexcept { | ||
| 168 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 169 | return ModifiedRegion<Type::GPU>(offset, query_size); | ||
| 170 | } | ||
| 171 | |||
| 172 | /// Returns true if a region has been modified from the CPU | ||
| 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||
| 174 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 175 | return IsRegionModified<Type::CPU>(offset, query_size); | ||
| 176 | } | ||
| 177 | |||
| 178 | /// Returns true if a region has been modified from the GPU | ||
| 179 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | ||
| 180 | const u64 offset = query_cpu_addr - cpu_addr; | ||
| 181 | return IsRegionModified<Type::GPU>(offset, query_size); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// Mark region as CPU modified, notifying the rasterizer about this change | ||
| 185 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||
| 186 | ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Unmark region as CPU modified, notifying the rasterizer about this change | ||
| 190 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | ||
| 191 | ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); | ||
| 192 | } | ||
| 193 | |||
| 194 | /// Mark region as modified from the host GPU | ||
| 195 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||
| 196 | ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); | ||
| 197 | } | ||
| 198 | |||
| 199 | /// Unmark region as modified from the host GPU | ||
| 200 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | ||
| 201 | ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); | ||
| 202 | } | ||
| 203 | |||
| 204 | /// Mark region as modified from the CPU | ||
| 205 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 206 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { | ||
| 207 | flags |= BufferFlagBits::CachedWrites; | ||
| 208 | ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 212 | void FlushCachedWrites() noexcept { | ||
| 213 | flags &= ~BufferFlagBits::CachedWrites; | ||
| 214 | const u64 num_words = NumWords(); | ||
| 215 | u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 216 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 217 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 218 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 219 | const u64 cached_bits = cached_words[word_index]; | ||
| 220 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 221 | untracked_words[word_index] |= cached_bits; | ||
| 222 | cpu_words[word_index] |= cached_bits; | ||
| 223 | if (!Settings::values.use_pessimistic_flushes) { | ||
| 224 | cached_words[word_index] = 0; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | ||
| 230 | template <typename Func> | ||
| 231 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | ||
| 232 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func); | ||
| 233 | } | ||
| 234 | |||
| 235 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 236 | template <typename Func> | ||
| 237 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) { | ||
| 238 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func); | ||
| 239 | } | ||
| 240 | |||
| 241 | template <typename Func> | ||
| 242 | void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) { | ||
| 243 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func); | ||
| 244 | } | ||
| 245 | |||
| 246 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 247 | template <typename Func> | ||
| 248 | void ForEachDownloadRange(Func&& func) { | ||
| 249 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func); | ||
| 250 | } | ||
| 251 | |||
| 252 | /// Mark buffer as picked | 52 | /// Mark buffer as picked |
| 253 | void Pick() noexcept { | 53 | void Pick() noexcept { |
| 254 | flags |= BufferFlagBits::Picked; | 54 | flags |= BufferFlagBits::Picked; |
| @@ -295,11 +95,6 @@ public: | |||
| 295 | return static_cast<u32>(other_cpu_addr - cpu_addr); | 95 | return static_cast<u32>(other_cpu_addr - cpu_addr); |
| 296 | } | 96 | } |
| 297 | 97 | ||
| 298 | /// Returns the size in bytes of the buffer | ||
| 299 | [[nodiscard]] u64 SizeBytes() const noexcept { | ||
| 300 | return words.size_bytes; | ||
| 301 | } | ||
| 302 | |||
| 303 | size_t getLRUID() const noexcept { | 98 | size_t getLRUID() const noexcept { |
| 304 | return lru_id; | 99 | return lru_id; |
| 305 | } | 100 | } |
| @@ -308,305 +103,16 @@ public: | |||
| 308 | lru_id = lru_id_; | 103 | lru_id = lru_id_; |
| 309 | } | 104 | } |
| 310 | 105 | ||
| 311 | private: | 106 | size_t SizeBytes() const { |
| 312 | template <Type type> | 107 | return size_bytes; |
| 313 | u64* Array() noexcept { | ||
| 314 | if constexpr (type == Type::CPU) { | ||
| 315 | return words.cpu.Pointer(IsShort()); | ||
| 316 | } else if constexpr (type == Type::GPU) { | ||
| 317 | return words.gpu.Pointer(IsShort()); | ||
| 318 | } else if constexpr (type == Type::CachedCPU) { | ||
| 319 | return words.cached_cpu.Pointer(IsShort()); | ||
| 320 | } else if constexpr (type == Type::Untracked) { | ||
| 321 | return words.untracked.Pointer(IsShort()); | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | template <Type type> | ||
| 326 | const u64* Array() const noexcept { | ||
| 327 | if constexpr (type == Type::CPU) { | ||
| 328 | return words.cpu.Pointer(IsShort()); | ||
| 329 | } else if constexpr (type == Type::GPU) { | ||
| 330 | return words.gpu.Pointer(IsShort()); | ||
| 331 | } else if constexpr (type == Type::CachedCPU) { | ||
| 332 | return words.cached_cpu.Pointer(IsShort()); | ||
| 333 | } else if constexpr (type == Type::Untracked) { | ||
| 334 | return words.untracked.Pointer(IsShort()); | ||
| 335 | } | ||
| 336 | } | ||
| 337 | |||
| 338 | /** | ||
| 339 | * Change the state of a range of pages | ||
| 340 | * | ||
| 341 | * @param dirty_addr Base address to mark or unmark as modified | ||
| 342 | * @param size Size in bytes to mark or unmark as modified | ||
| 343 | */ | ||
| 344 | template <Type type, bool enable> | ||
| 345 | void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { | ||
| 346 | const s64 difference = dirty_addr - cpu_addr; | ||
| 347 | const u64 offset = std::max<s64>(difference, 0); | ||
| 348 | size += std::min<s64>(difference, 0); | ||
| 349 | if (offset >= SizeBytes() || size < 0) { | ||
| 350 | return; | ||
| 351 | } | ||
| 352 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 353 | u64* const state_words = Array<type>(); | ||
| 354 | const u64 offset_end = std::min(offset + size, SizeBytes()); | ||
| 355 | const u64 begin_page_index = offset / BYTES_PER_PAGE; | ||
| 356 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; | ||
| 357 | const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE); | ||
| 358 | const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD); | ||
| 359 | u64 page_index = begin_page_index % PAGES_PER_WORD; | ||
| 360 | u64 word_index = begin_word_index; | ||
| 361 | while (word_index < end_word_index) { | ||
| 362 | const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD; | ||
| 363 | const u64 left_offset = | ||
| 364 | std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD; | ||
| 365 | const u64 right_offset = page_index; | ||
| 366 | u64 bits = ~u64{0}; | ||
| 367 | bits = (bits >> right_offset) << right_offset; | ||
| 368 | bits = (bits << left_offset) >> left_offset; | ||
| 369 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 370 | NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); | ||
| 371 | } | ||
| 372 | if constexpr (enable) { | ||
| 373 | state_words[word_index] |= bits; | ||
| 374 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 375 | untracked_words[word_index] |= bits; | ||
| 376 | } | ||
| 377 | } else { | ||
| 378 | state_words[word_index] &= ~bits; | ||
| 379 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 380 | untracked_words[word_index] &= ~bits; | ||
| 381 | } | ||
| 382 | } | ||
| 383 | page_index = 0; | ||
| 384 | ++word_index; | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | /** | ||
| 389 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||
| 390 | * | ||
| 391 | * @param word_index Index to the word to notify to the rasterizer | ||
| 392 | * @param current_bits Current state of the word | ||
| 393 | * @param new_bits New state of the word | ||
| 394 | * | ||
| 395 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||
| 396 | */ | ||
| 397 | template <bool add_to_rasterizer> | ||
| 398 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||
| 399 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||
| 400 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||
| 401 | while (changed_bits != 0) { | ||
| 402 | const int empty_bits = std::countr_zero(changed_bits); | ||
| 403 | addr += empty_bits * BYTES_PER_PAGE; | ||
| 404 | changed_bits >>= empty_bits; | ||
| 405 | |||
| 406 | const u32 continuous_bits = std::countr_one(changed_bits); | ||
| 407 | const u64 size = continuous_bits * BYTES_PER_PAGE; | ||
| 408 | const VAddr begin_addr = addr; | ||
| 409 | addr += size; | ||
| 410 | changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0; | ||
| 411 | rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1); | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | /** | ||
| 416 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||
| 417 | * needed. Call the given function on each turned off range. | ||
| 418 | * | ||
| 419 | * @param query_cpu_range Base CPU address to loop over | ||
| 420 | * @param size Size in bytes of the CPU range to loop over | ||
| 421 | * @param func Function to call for each turned off region | ||
| 422 | */ | ||
| 423 | template <Type type, typename Func> | ||
| 424 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) { | ||
| 425 | static_assert(type != Type::Untracked); | ||
| 426 | |||
| 427 | const s64 difference = query_cpu_range - cpu_addr; | ||
| 428 | const u64 query_begin = std::max<s64>(difference, 0); | ||
| 429 | size += std::min<s64>(difference, 0); | ||
| 430 | if (query_begin >= SizeBytes() || size < 0) { | ||
| 431 | return; | ||
| 432 | } | ||
| 433 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 434 | u64* const state_words = Array<type>(); | ||
| 435 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | ||
| 436 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | ||
| 437 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); | ||
| 438 | |||
| 439 | const auto modified = [](u64 word) { return word != 0; }; | ||
| 440 | const auto first_modified_word = std::find_if(words_begin, words_end, modified); | ||
| 441 | if (first_modified_word == words_end) { | ||
| 442 | // Exit early when the buffer is not modified | ||
| 443 | return; | ||
| 444 | } | ||
| 445 | const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified); | ||
| 446 | |||
| 447 | const u64 word_index_begin = std::distance(state_words, first_modified_word); | ||
| 448 | const u64 word_index_end = std::distance(state_words, last_modified_word); | ||
| 449 | |||
| 450 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); | ||
| 451 | const unsigned local_page_end = | ||
| 452 | static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); | ||
| 453 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; | ||
| 454 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; | ||
| 455 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; | ||
| 456 | const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE); | ||
| 457 | const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin); | ||
| 458 | const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end); | ||
| 459 | const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD; | ||
| 460 | const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1; | ||
| 461 | |||
| 462 | u64 page_begin = first_word_page_begin; | ||
| 463 | u64 current_base = 0; | ||
| 464 | u64 current_size = 0; | ||
| 465 | bool on_going = false; | ||
| 466 | for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) { | ||
| 467 | const bool is_last_word = word_index + 1 == word_index_end; | ||
| 468 | const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD; | ||
| 469 | const u64 right_offset = page_begin; | ||
| 470 | const u64 left_offset = PAGES_PER_WORD - page_end; | ||
| 471 | u64 bits = ~u64{0}; | ||
| 472 | bits = (bits >> right_offset) << right_offset; | ||
| 473 | bits = (bits << left_offset) >> left_offset; | ||
| 474 | |||
| 475 | const u64 current_word = state_words[word_index] & bits; | ||
| 476 | if (clear) { | ||
| 477 | state_words[word_index] &= ~bits; | ||
| 478 | } | ||
| 479 | |||
| 480 | if constexpr (type == Type::CPU) { | ||
| 481 | const u64 current_bits = untracked_words[word_index] & bits; | ||
| 482 | untracked_words[word_index] &= ~bits; | ||
| 483 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); | ||
| 484 | } | ||
| 485 | // Exclude CPU modified pages when visiting GPU pages | ||
| 486 | const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||
| 487 | u64 page = page_begin; | ||
| 488 | page_begin = 0; | ||
| 489 | |||
| 490 | while (page < page_end) { | ||
| 491 | const int empty_bits = std::countr_zero(word >> page); | ||
| 492 | if (on_going && empty_bits != 0) { | ||
| 493 | InvokeModifiedRange(func, current_size, current_base); | ||
| 494 | current_size = 0; | ||
| 495 | on_going = false; | ||
| 496 | } | ||
| 497 | if (empty_bits == PAGES_PER_WORD) { | ||
| 498 | break; | ||
| 499 | } | ||
| 500 | page += empty_bits; | ||
| 501 | |||
| 502 | const int continuous_bits = std::countr_one(word >> page); | ||
| 503 | if (!on_going && continuous_bits != 0) { | ||
| 504 | current_base = word_index * PAGES_PER_WORD + page; | ||
| 505 | on_going = true; | ||
| 506 | } | ||
| 507 | current_size += continuous_bits; | ||
| 508 | page += continuous_bits; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | if (on_going && current_size > 0) { | ||
| 512 | InvokeModifiedRange(func, current_size, current_base); | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | template <typename Func> | ||
| 517 | void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) { | ||
| 518 | const u64 current_size_bytes = current_size * BYTES_PER_PAGE; | ||
| 519 | const u64 offset_begin = current_base * BYTES_PER_PAGE; | ||
| 520 | const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes()); | ||
| 521 | func(offset_begin, offset_end - offset_begin); | ||
| 522 | } | 108 | } |
| 523 | 109 | ||
| 524 | /** | 110 | private: |
| 525 | * Returns true when a region has been modified | ||
| 526 | * | ||
| 527 | * @param offset Offset in bytes from the start of the buffer | ||
| 528 | * @param size Size in bytes of the region to query for modifications | ||
| 529 | */ | ||
| 530 | template <Type type> | ||
| 531 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||
| 532 | static_assert(type != Type::Untracked); | ||
| 533 | |||
| 534 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 535 | const u64* const state_words = Array<type>(); | ||
| 536 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||
| 537 | const u64 word_begin = offset / BYTES_PER_WORD; | ||
| 538 | const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); | ||
| 539 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||
| 540 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | ||
| 541 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | ||
| 542 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||
| 543 | const u64 word = state_words[word_index] & ~off_word; | ||
| 544 | if (word == 0) { | ||
| 545 | continue; | ||
| 546 | } | ||
| 547 | const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit); | ||
| 548 | const u64 local_page_end = page_end % PAGES_PER_WORD; | ||
| 549 | const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD; | ||
| 550 | if (((word >> page_index) << page_index) << page_end_shift != 0) { | ||
| 551 | return true; | ||
| 552 | } | ||
| 553 | } | ||
| 554 | return false; | ||
| 555 | } | ||
| 556 | |||
| 557 | /** | ||
| 558 | * Returns a begin end pair with the inclusive modified region | ||
| 559 | * | ||
| 560 | * @param offset Offset in bytes from the start of the buffer | ||
| 561 | * @param size Size in bytes of the region to query for modifications | ||
| 562 | */ | ||
| 563 | template <Type type> | ||
| 564 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||
| 565 | static_assert(type != Type::Untracked); | ||
| 566 | |||
| 567 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 568 | const u64* const state_words = Array<type>(); | ||
| 569 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||
| 570 | const u64 word_begin = offset / BYTES_PER_WORD; | ||
| 571 | const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords()); | ||
| 572 | const u64 page_base = offset / BYTES_PER_PAGE; | ||
| 573 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||
| 574 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 575 | u64 end = 0; | ||
| 576 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | ||
| 577 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||
| 578 | const u64 word = state_words[word_index] & ~off_word; | ||
| 579 | if (word == 0) { | ||
| 580 | continue; | ||
| 581 | } | ||
| 582 | const u64 local_page_begin = std::countr_zero(word); | ||
| 583 | const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); | ||
| 584 | const u64 page_index = word_index * PAGES_PER_WORD; | ||
| 585 | const u64 page_begin = std::max(page_index + local_page_begin, page_base); | ||
| 586 | const u64 page_end = std::min(page_index + local_page_end, page_limit); | ||
| 587 | begin = std::min(begin, page_begin); | ||
| 588 | end = std::max(end, page_end); | ||
| 589 | } | ||
| 590 | static constexpr std::pair<u64, u64> EMPTY{0, 0}; | ||
| 591 | return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; | ||
| 592 | } | ||
| 593 | |||
| 594 | /// Returns the number of words of the buffer | ||
| 595 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 596 | return words.NumWords(); | ||
| 597 | } | ||
| 598 | |||
| 599 | /// Returns true when the buffer fits in the small vector optimization | ||
| 600 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 601 | return words.IsShort(); | ||
| 602 | } | ||
| 603 | |||
| 604 | RasterizerInterface* rasterizer = nullptr; | ||
| 605 | VAddr cpu_addr = 0; | 111 | VAddr cpu_addr = 0; |
| 606 | Words words; | ||
| 607 | BufferFlagBits flags{}; | 112 | BufferFlagBits flags{}; |
| 608 | int stream_score = 0; | 113 | int stream_score = 0; |
| 609 | size_t lru_id = SIZE_MAX; | 114 | size_t lru_id = SIZE_MAX; |
| 115 | size_t size_bytes = 0; | ||
| 610 | }; | 116 | }; |
| 611 | 117 | ||
| 612 | } // namespace VideoCommon | 118 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index a16308b60..40db243d2 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include "common/microprofile.h" | 4 | #include "common/microprofile.h" |
| 5 | 5 | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index abdc593df..e534e1e9c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -1,485 +1,29 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <array> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <mutex> | ||
| 10 | #include <numeric> | 8 | #include <numeric> |
| 11 | #include <span> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <boost/container/small_vector.hpp> | ||
| 15 | #include <boost/icl/interval_set.hpp> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "common/div_ceil.h" | ||
| 19 | #include "common/literals.h" | ||
| 20 | #include "common/lru_cache.h" | ||
| 21 | #include "common/microprofile.h" | ||
| 22 | #include "common/polyfill_ranges.h" | ||
| 23 | #include "common/scratch_buffer.h" | ||
| 24 | #include "common/settings.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 27 | #include "video_core/control/channel_state_cache.h" | ||
| 28 | #include "video_core/delayed_destruction_ring.h" | ||
| 29 | #include "video_core/dirty_flags.h" | ||
| 30 | #include "video_core/engines/draw_manager.h" | ||
| 31 | #include "video_core/engines/kepler_compute.h" | ||
| 32 | #include "video_core/engines/maxwell_3d.h" | ||
| 33 | #include "video_core/memory_manager.h" | ||
| 34 | #include "video_core/rasterizer_interface.h" | ||
| 35 | #include "video_core/surface.h" | ||
| 36 | #include "video_core/texture_cache/slot_vector.h" | ||
| 37 | #include "video_core/texture_cache/types.h" | ||
| 38 | 9 | ||
| 39 | namespace VideoCommon { | 10 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 40 | |||
| 41 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | ||
| 42 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 43 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 44 | |||
| 45 | using BufferId = SlotId; | ||
| 46 | |||
| 47 | using VideoCore::Surface::PixelFormat; | ||
| 48 | using namespace Common::Literals; | ||
| 49 | |||
| 50 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 51 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 52 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 53 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 54 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 55 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 56 | constexpr u32 NUM_STAGES = 5; | ||
| 57 | |||
| 58 | enum class ObtainBufferSynchronize : u32 { | ||
| 59 | NoSynchronize = 0, | ||
| 60 | FullSynchronize = 1, | ||
| 61 | SynchronizeNoDirty = 2, | ||
| 62 | }; | ||
| 63 | |||
| 64 | enum class ObtainBufferOperation : u32 { | ||
| 65 | DoNothing = 0, | ||
| 66 | MarkAsWritten = 1, | ||
| 67 | DiscardWrite = 2, | ||
| 68 | MarkQuery = 3, | ||
| 69 | }; | ||
| 70 | |||
| 71 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||
| 72 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 73 | |||
| 74 | template <typename P> | ||
| 75 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 76 | |||
| 77 | // Page size for caching purposes. | ||
| 78 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | ||
| 79 | static constexpr u32 YUZU_PAGEBITS = 16; | ||
| 80 | static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS; | ||
| 81 | |||
| 82 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 83 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 84 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 85 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 86 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 87 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 88 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 89 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 90 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 91 | |||
| 92 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 93 | |||
| 94 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | ||
| 95 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | ||
| 96 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 97 | |||
| 98 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 99 | |||
| 100 | using Runtime = typename P::Runtime; | ||
| 101 | using Buffer = typename P::Buffer; | ||
| 102 | |||
| 103 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 104 | using IntervalType = typename IntervalSet::interval_type; | ||
| 105 | |||
| 106 | struct Empty {}; | ||
| 107 | |||
| 108 | struct OverlapResult { | ||
| 109 | std::vector<BufferId> ids; | ||
| 110 | VAddr begin; | ||
| 111 | VAddr end; | ||
| 112 | bool has_stream_leap = false; | ||
| 113 | }; | ||
| 114 | |||
| 115 | struct Binding { | ||
| 116 | VAddr cpu_addr{}; | ||
| 117 | u32 size{}; | ||
| 118 | BufferId buffer_id; | ||
| 119 | }; | ||
| 120 | |||
| 121 | struct TextureBufferBinding : Binding { | ||
| 122 | PixelFormat format; | ||
| 123 | }; | ||
| 124 | |||
| 125 | static constexpr Binding NULL_BINDING{ | ||
| 126 | .cpu_addr = 0, | ||
| 127 | .size = 0, | ||
| 128 | .buffer_id = NULL_BUFFER_ID, | ||
| 129 | }; | ||
| 130 | |||
| 131 | public: | ||
| 132 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 133 | |||
| 134 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 135 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 136 | |||
| 137 | void TickFrame(); | ||
| 138 | |||
| 139 | void WriteMemory(VAddr cpu_addr, u64 size); | ||
| 140 | |||
| 141 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||
| 142 | |||
| 143 | void DownloadMemory(VAddr cpu_addr, u64 size); | ||
| 144 | |||
| 145 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||
| 146 | |||
| 147 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||
| 148 | |||
| 149 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||
| 150 | |||
| 151 | void UpdateGraphicsBuffers(bool is_indexed); | ||
| 152 | |||
| 153 | void UpdateComputeBuffers(); | ||
| 154 | |||
| 155 | void BindHostGeometryBuffers(bool is_indexed); | ||
| 156 | |||
| 157 | void BindHostStageBuffers(size_t stage); | ||
| 158 | |||
| 159 | void BindHostComputeBuffers(); | ||
| 160 | |||
| 161 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | ||
| 162 | const UniformBufferSizes* sizes); | ||
| 163 | |||
| 164 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); | ||
| 165 | |||
| 166 | void UnbindGraphicsStorageBuffers(size_t stage); | ||
| 167 | |||
| 168 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 169 | bool is_written); | ||
| 170 | |||
| 171 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 172 | |||
| 173 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 174 | PixelFormat format, bool is_written, bool is_image); | ||
| 175 | |||
| 176 | void UnbindComputeStorageBuffers(); | ||
| 177 | |||
| 178 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 179 | bool is_written); | ||
| 180 | |||
| 181 | void UnbindComputeTextureBuffers(); | ||
| 182 | |||
| 183 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 184 | bool is_written, bool is_image); | ||
| 185 | |||
| 186 | void FlushCachedWrites(); | ||
| 187 | |||
| 188 | /// Return true when there are uncommitted buffers to be downloaded | ||
| 189 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 190 | |||
| 191 | void AccumulateFlushes(); | ||
| 192 | |||
| 193 | /// Return true when the caller should wait for async downloads | ||
| 194 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 195 | |||
| 196 | /// Commit asynchronous downloads | ||
| 197 | void CommitAsyncFlushes(); | ||
| 198 | void CommitAsyncFlushesHigh(); | ||
| 199 | |||
| 200 | /// Pop asynchronous downloads | ||
| 201 | void PopAsyncFlushes(); | ||
| 202 | |||
| 203 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 204 | |||
| 205 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 206 | |||
| 207 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 208 | ObtainBufferSynchronize sync_info, | ||
| 209 | ObtainBufferOperation post_op); | ||
| 210 | |||
| 211 | /// Return true when a CPU region is modified from the GPU | ||
| 212 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 213 | |||
| 214 | /// Return true when a region is registered on the cache | ||
| 215 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 216 | |||
| 217 | /// Return true when a CPU region is modified from the CPU | ||
| 218 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
| 219 | |||
| 220 | void SetDrawIndirect( | ||
| 221 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 222 | current_draw_indirect = current_draw_indirect_; | ||
| 223 | } | ||
| 224 | |||
| 225 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 226 | |||
| 227 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 228 | |||
| 229 | std::recursive_mutex mutex; | ||
| 230 | Runtime& runtime; | ||
| 231 | |||
| 232 | private: | ||
| 233 | template <typename Func> | ||
| 234 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 235 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 236 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 237 | index += disabled_bits; | ||
| 238 | enabled_mask >>= disabled_bits; | ||
| 239 | func(index); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | template <typename Func> | ||
| 244 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 245 | const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE); | ||
| 246 | for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) { | ||
| 247 | const BufferId buffer_id = page_table[page]; | ||
| 248 | if (!buffer_id) { | ||
| 249 | ++page; | ||
| 250 | continue; | ||
| 251 | } | ||
| 252 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 253 | func(buffer_id, buffer); | ||
| 254 | |||
| 255 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 256 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | template <typename Func> | ||
| 261 | void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 262 | const VAddr start_address = cpu_addr; | ||
| 263 | const VAddr end_address = start_address + size; | ||
| 264 | const VAddr search_base = | ||
| 265 | static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size))); | ||
| 266 | const IntervalType search_interval{search_base, search_base + 1}; | ||
| 267 | auto it = common_ranges.lower_bound(search_interval); | ||
| 268 | if (it == common_ranges.end()) { | ||
| 269 | it = common_ranges.begin(); | ||
| 270 | } | ||
| 271 | for (; it != common_ranges.end(); it++) { | ||
| 272 | VAddr inter_addr_end = it->upper(); | ||
| 273 | VAddr inter_addr = it->lower(); | ||
| 274 | if (inter_addr >= end_address) { | ||
| 275 | break; | ||
| 276 | } | ||
| 277 | if (inter_addr_end <= start_address) { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | if (inter_addr_end > end_address) { | ||
| 281 | inter_addr_end = end_address; | ||
| 282 | } | ||
| 283 | if (inter_addr < start_address) { | ||
| 284 | inter_addr = start_address; | ||
| 285 | } | ||
| 286 | func(inter_addr, inter_addr_end); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||
| 291 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||
| 292 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||
| 293 | } | ||
| 294 | |||
| 295 | void RunGarbageCollector(); | ||
| 296 | |||
| 297 | void BindHostIndexBuffer(); | ||
| 298 | |||
| 299 | void BindHostVertexBuffers(); | ||
| 300 | |||
| 301 | void BindHostDrawIndirectBuffers(); | ||
| 302 | |||
| 303 | void BindHostGraphicsUniformBuffers(size_t stage); | ||
| 304 | |||
| 305 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | ||
| 306 | |||
| 307 | void BindHostGraphicsStorageBuffers(size_t stage); | ||
| 308 | |||
| 309 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 310 | |||
| 311 | void BindHostTransformFeedbackBuffers(); | ||
| 312 | |||
| 313 | void BindHostComputeUniformBuffers(); | ||
| 314 | |||
| 315 | void BindHostComputeStorageBuffers(); | ||
| 316 | |||
| 317 | void BindHostComputeTextureBuffers(); | ||
| 318 | |||
| 319 | void DoUpdateGraphicsBuffers(bool is_indexed); | ||
| 320 | |||
| 321 | void DoUpdateComputeBuffers(); | ||
| 322 | |||
| 323 | void UpdateIndexBuffer(); | ||
| 324 | |||
| 325 | void UpdateVertexBuffers(); | ||
| 326 | |||
| 327 | void UpdateVertexBuffer(u32 index); | ||
| 328 | |||
| 329 | void UpdateDrawIndirect(); | ||
| 330 | |||
| 331 | void UpdateUniformBuffers(size_t stage); | ||
| 332 | |||
| 333 | void UpdateStorageBuffers(size_t stage); | ||
| 334 | |||
| 335 | void UpdateTextureBuffers(size_t stage); | ||
| 336 | |||
| 337 | void UpdateTransformFeedbackBuffers(); | ||
| 338 | |||
| 339 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 340 | |||
| 341 | void UpdateComputeUniformBuffers(); | ||
| 342 | |||
| 343 | void UpdateComputeStorageBuffers(); | ||
| 344 | |||
| 345 | void UpdateComputeTextureBuffers(); | ||
| 346 | |||
| 347 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 348 | |||
| 349 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 350 | |||
| 351 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 352 | |||
| 353 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 354 | |||
| 355 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 356 | |||
| 357 | void Register(BufferId buffer_id); | ||
| 358 | |||
| 359 | void Unregister(BufferId buffer_id); | ||
| 360 | |||
| 361 | template <bool insert> | ||
| 362 | void ChangeRegister(BufferId buffer_id); | ||
| 363 | |||
| 364 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||
| 365 | |||
| 366 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 367 | |||
| 368 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 369 | |||
| 370 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 371 | std::span<BufferCopy> copies); | ||
| 372 | |||
| 373 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 374 | std::span<const BufferCopy> copies); | ||
| 375 | |||
| 376 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 377 | |||
| 378 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 379 | |||
| 380 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 381 | |||
| 382 | void DeleteBuffer(BufferId buffer_id); | ||
| 383 | |||
| 384 | void NotifyBufferDeletion(); | ||
| 385 | |||
| 386 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | ||
| 387 | bool is_written = false) const; | ||
| 388 | |||
| 389 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 390 | PixelFormat format); | ||
| 391 | |||
| 392 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 393 | |||
| 394 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 395 | |||
| 396 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 397 | |||
| 398 | void ClearDownload(IntervalType subtract_interval); | ||
| 399 | |||
| 400 | VideoCore::RasterizerInterface& rasterizer; | ||
| 401 | Core::Memory::Memory& cpu_memory; | ||
| 402 | |||
| 403 | SlotVector<Buffer> slot_buffers; | ||
| 404 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 405 | |||
| 406 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 407 | |||
| 408 | u32 last_index_count = 0; | ||
| 409 | |||
| 410 | Binding index_buffer; | ||
| 411 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 412 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 413 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 414 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 415 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 416 | Binding count_buffer_binding; | ||
| 417 | Binding indirect_buffer_binding; | ||
| 418 | |||
| 419 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 420 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 421 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 422 | |||
| 423 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 424 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 425 | |||
| 426 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 427 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 428 | |||
| 429 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 430 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 431 | u32 enabled_compute_storage_buffers = 0; | ||
| 432 | u32 written_compute_storage_buffers = 0; | ||
| 433 | |||
| 434 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 435 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 436 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 437 | u32 enabled_compute_texture_buffers = 0; | ||
| 438 | u32 written_compute_texture_buffers = 0; | ||
| 439 | u32 image_compute_texture_buffers = 0; | ||
| 440 | |||
| 441 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 442 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 443 | |||
| 444 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 445 | |||
| 446 | bool has_deleted_buffers = false; | ||
| 447 | 11 | ||
| 448 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | 12 | namespace VideoCommon { |
| 449 | dirty_uniform_buffers{}; | ||
| 450 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 451 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 452 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 453 | uniform_buffer_binding_sizes{}; | ||
| 454 | |||
| 455 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 456 | |||
| 457 | IntervalSet uncommitted_ranges; | ||
| 458 | IntervalSet common_ranges; | ||
| 459 | std::deque<IntervalSet> committed_ranges; | ||
| 460 | |||
| 461 | Common::ScratchBuffer<u8> immediate_buffer_alloc; | ||
| 462 | |||
| 463 | struct LRUItemParams { | ||
| 464 | using ObjectType = BufferId; | ||
| 465 | using TickType = u64; | ||
| 466 | }; | ||
| 467 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 468 | u64 frame_tick = 0; | ||
| 469 | u64 total_used_memory = 0; | ||
| 470 | u64 minimum_memory = 0; | ||
| 471 | u64 critical_memory = 0; | ||
| 472 | 13 | ||
| 473 | std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table; | 14 | using Core::Memory::YUZU_PAGESIZE; |
| 474 | }; | ||
| 475 | 15 | ||
| 476 | template <class P> | 16 | template <class P> |
| 477 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | 17 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 478 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) | 18 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_) |
| 479 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { | 19 | : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{ |
| 20 | rasterizer} { | ||
| 480 | // Ensure the first slot is used for the null buffer | 21 | // Ensure the first slot is used for the null buffer |
| 481 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 22 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 482 | common_ranges.clear(); | 23 | common_ranges.clear(); |
| 24 | inline_buffer_id = NULL_BUFFER_ID; | ||
| 25 | |||
| 26 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 483 | 27 | ||
| 484 | if (!runtime.CanReportMemoryUsage()) { | 28 | if (!runtime.CanReportMemoryUsage()) { |
| 485 | minimum_memory = DEFAULT_EXPECTED_MEMORY; | 29 | minimum_memory = DEFAULT_EXPECTED_MEMORY; |
| @@ -531,6 +75,8 @@ void BufferCache<P>::TickFrame() { | |||
| 531 | uniform_cache_hits[0] = 0; | 75 | uniform_cache_hits[0] = 0; |
| 532 | uniform_cache_shots[0] = 0; | 76 | uniform_cache_shots[0] = 0; |
| 533 | 77 | ||
| 78 | active_async_buffers = !Settings::IsGPULevelHigh(); | ||
| 79 | |||
| 534 | const bool skip_preferred = hits * 256 < shots * 251; | 80 | const bool skip_preferred = hits * 256 < shots * 251; |
| 535 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | 81 | uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |
| 536 | 82 | ||
| @@ -543,35 +89,62 @@ void BufferCache<P>::TickFrame() { | |||
| 543 | } | 89 | } |
| 544 | ++frame_tick; | 90 | ++frame_tick; |
| 545 | delayed_destruction_ring.Tick(); | 91 | delayed_destruction_ring.Tick(); |
| 92 | |||
| 93 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 94 | for (auto& buffer : async_buffers_death_ring) { | ||
| 95 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 96 | } | ||
| 97 | async_buffers_death_ring.clear(); | ||
| 98 | } | ||
| 546 | } | 99 | } |
| 547 | 100 | ||
| 548 | template <class P> | 101 | template <class P> |
| 549 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | 102 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { |
| 550 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 103 | memory_tracker.MarkRegionAsCpuModified(cpu_addr, size); |
| 551 | buffer.MarkRegionAsCpuModified(cpu_addr, size); | 104 | if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { |
| 552 | }); | 105 | const IntervalType subtract_interval{cpu_addr, cpu_addr + size}; |
| 106 | ClearDownload(subtract_interval); | ||
| 107 | common_ranges.subtract(subtract_interval); | ||
| 108 | } | ||
| 553 | } | 109 | } |
| 554 | 110 | ||
| 555 | template <class P> | 111 | template <class P> |
| 556 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | 112 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { |
| 557 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 113 | memory_tracker.CachedCpuWrite(cpu_addr, size); |
| 558 | if (!buffer.HasCachedWrites()) { | 114 | const IntervalType add_interval{Common::AlignDown(cpu_addr, YUZU_PAGESIZE), |
| 559 | cached_write_buffer_ids.push_back(buffer_id); | 115 | Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE)}; |
| 560 | } | 116 | cached_ranges.add(add_interval); |
| 561 | buffer.CachedCpuWrite(cpu_addr, size); | ||
| 562 | }); | ||
| 563 | } | 117 | } |
| 564 | 118 | ||
| 565 | template <class P> | 119 | template <class P> |
| 566 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | 120 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { |
| 121 | WaitOnAsyncFlushes(cpu_addr, size); | ||
| 567 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | 122 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { |
| 568 | DownloadBufferMemory(buffer, cpu_addr, size); | 123 | DownloadBufferMemory(buffer, cpu_addr, size); |
| 569 | }); | 124 | }); |
| 570 | } | 125 | } |
| 571 | 126 | ||
| 572 | template <class P> | 127 | template <class P> |
| 128 | void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) { | ||
| 129 | bool must_wait = false; | ||
| 130 | ForEachInOverlapCounter(async_downloads, cpu_addr, size, | ||
| 131 | [&](VAddr, VAddr, int) { must_wait = true; }); | ||
| 132 | bool must_release = false; | ||
| 133 | ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; }); | ||
| 134 | if (must_release) { | ||
| 135 | std::function<void()> tmp([]() {}); | ||
| 136 | rasterizer.SignalFence(std::move(tmp)); | ||
| 137 | } | ||
| 138 | if (must_wait || must_release) { | ||
| 139 | rasterizer.ReleaseFences(); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | template <class P> | ||
| 573 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | 144 | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { |
| 145 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); | ||
| 574 | uncommitted_ranges.subtract(subtract_interval); | 146 | uncommitted_ranges.subtract(subtract_interval); |
| 147 | pending_ranges.subtract(subtract_interval); | ||
| 575 | for (auto& interval_set : committed_ranges) { | 148 | for (auto& interval_set : committed_ranges) { |
| 576 | interval_set.subtract(subtract_interval); | 149 | interval_set.subtract(subtract_interval); |
| 577 | } | 150 | } |
| @@ -591,6 +164,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 591 | } | 164 | } |
| 592 | 165 | ||
| 593 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | 166 | const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; |
| 167 | WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount)); | ||
| 594 | ClearDownload(subtract_interval); | 168 | ClearDownload(subtract_interval); |
| 595 | 169 | ||
| 596 | BufferId buffer_a; | 170 | BufferId buffer_a; |
| @@ -616,10 +190,11 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 616 | const VAddr diff = base_address - *cpu_src_address; | 190 | const VAddr diff = base_address - *cpu_src_address; |
| 617 | const VAddr new_base_address = *cpu_dest_address + diff; | 191 | const VAddr new_base_address = *cpu_dest_address + diff; |
| 618 | const IntervalType add_interval{new_base_address, new_base_address + size}; | 192 | const IntervalType add_interval{new_base_address, new_base_address + size}; |
| 619 | uncommitted_ranges.add(add_interval); | ||
| 620 | tmp_intervals.push_back(add_interval); | 193 | tmp_intervals.push_back(add_interval); |
| 194 | uncommitted_ranges.add(add_interval); | ||
| 195 | pending_ranges.add(add_interval); | ||
| 621 | }; | 196 | }; |
| 622 | ForEachWrittenRange(*cpu_src_address, amount, mirror); | 197 | ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); |
| 623 | // This subtraction in this order is important for overlapping copies. | 198 | // This subtraction in this order is important for overlapping copies. |
| 624 | common_ranges.subtract(subtract_interval); | 199 | common_ranges.subtract(subtract_interval); |
| 625 | const bool has_new_downloads = tmp_intervals.size() != 0; | 200 | const bool has_new_downloads = tmp_intervals.size() != 0; |
| @@ -628,7 +203,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 628 | } | 203 | } |
| 629 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); | 204 | runtime.CopyBuffer(dest_buffer, src_buffer, copies); |
| 630 | if (has_new_downloads) { | 205 | if (has_new_downloads) { |
| 631 | dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 206 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 632 | } | 207 | } |
| 633 | std::vector<u8> tmp_buffer(amount); | 208 | std::vector<u8> tmp_buffer(amount); |
| 634 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | 209 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); |
| @@ -866,10 +441,9 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add | |||
| 866 | 441 | ||
| 867 | template <class P> | 442 | template <class P> |
| 868 | void BufferCache<P>::FlushCachedWrites() { | 443 | void BufferCache<P>::FlushCachedWrites() { |
| 869 | for (const BufferId buffer_id : cached_write_buffer_ids) { | ||
| 870 | slot_buffers[buffer_id].FlushCachedWrites(); | ||
| 871 | } | ||
| 872 | cached_write_buffer_ids.clear(); | 444 | cached_write_buffer_ids.clear(); |
| 445 | memory_tracker.FlushCachedWrites(); | ||
| 446 | cached_ranges.clear(); | ||
| 873 | } | 447 | } |
| 874 | 448 | ||
| 875 | template <class P> | 449 | template <class P> |
| @@ -879,10 +453,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | |||
| 879 | 453 | ||
| 880 | template <class P> | 454 | template <class P> |
| 881 | void BufferCache<P>::AccumulateFlushes() { | 455 | void BufferCache<P>::AccumulateFlushes() { |
| 882 | if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) { | ||
| 883 | uncommitted_ranges.clear(); | ||
| 884 | return; | ||
| 885 | } | ||
| 886 | if (uncommitted_ranges.empty()) { | 456 | if (uncommitted_ranges.empty()) { |
| 887 | return; | 457 | return; |
| 888 | } | 458 | } |
| @@ -891,7 +461,11 @@ void BufferCache<P>::AccumulateFlushes() { | |||
| 891 | 461 | ||
| 892 | template <class P> | 462 | template <class P> |
| 893 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | 463 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 894 | return false; | 464 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 465 | return (!async_buffers.empty() && async_buffers.front().has_value()); | ||
| 466 | } else { | ||
| 467 | return false; | ||
| 468 | } | ||
| 895 | } | 469 | } |
| 896 | 470 | ||
| 897 | template <class P> | 471 | template <class P> |
| @@ -899,12 +473,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 899 | AccumulateFlushes(); | 473 | AccumulateFlushes(); |
| 900 | 474 | ||
| 901 | if (committed_ranges.empty()) { | 475 | if (committed_ranges.empty()) { |
| 476 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 477 | if (active_async_buffers) { | ||
| 478 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 479 | } | ||
| 480 | } | ||
| 902 | return; | 481 | return; |
| 903 | } | 482 | } |
| 904 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | 483 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 905 | const bool is_accuracy_normal = | ||
| 906 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||
| 907 | 484 | ||
| 485 | pending_ranges.clear(); | ||
| 908 | auto it = committed_ranges.begin(); | 486 | auto it = committed_ranges.begin(); |
| 909 | while (it != committed_ranges.end()) { | 487 | while (it != committed_ranges.end()) { |
| 910 | auto& current_intervals = *it; | 488 | auto& current_intervals = *it; |
| @@ -926,11 +504,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 926 | const std::size_t size = interval.upper() - interval.lower(); | 504 | const std::size_t size = interval.upper() - interval.lower(); |
| 927 | const VAddr cpu_addr = interval.lower(); | 505 | const VAddr cpu_addr = interval.lower(); |
| 928 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | 506 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { |
| 929 | buffer.ForEachDownloadRangeAndClear( | 507 | const VAddr buffer_start = buffer.CpuAddr(); |
| 930 | cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 508 | const VAddr buffer_end = buffer_start + buffer.SizeBytes(); |
| 931 | if (is_accuracy_normal) { | 509 | const VAddr new_start = std::max(buffer_start, cpu_addr); |
| 932 | return; | 510 | const VAddr new_end = std::min(buffer_end, cpu_addr + size); |
| 933 | } | 511 | memory_tracker.ForEachDownloadRange( |
| 512 | new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 934 | const VAddr buffer_addr = buffer.CpuAddr(); | 513 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 935 | const auto add_download = [&](VAddr start, VAddr end) { | 514 | const auto add_download = [&](VAddr start, VAddr end) { |
| 936 | const u64 new_offset = start - buffer_addr; | 515 | const u64 new_offset = start - buffer_addr; |
| @@ -944,92 +523,142 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 944 | buffer_id, | 523 | buffer_id, |
| 945 | }); | 524 | }); |
| 946 | // Align up to avoid cache conflicts | 525 | // Align up to avoid cache conflicts |
| 947 | constexpr u64 align = 8ULL; | 526 | constexpr u64 align = 64ULL; |
| 948 | constexpr u64 mask = ~(align - 1ULL); | 527 | constexpr u64 mask = ~(align - 1ULL); |
| 949 | total_size_bytes += (new_size + align - 1) & mask; | 528 | total_size_bytes += (new_size + align - 1) & mask; |
| 950 | largest_copy = std::max(largest_copy, new_size); | 529 | largest_copy = std::max(largest_copy, new_size); |
| 951 | }; | 530 | }; |
| 952 | 531 | ||
| 953 | const VAddr start_address = buffer_addr + range_offset; | 532 | ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download); |
| 954 | const VAddr end_address = start_address + range_size; | ||
| 955 | ForEachWrittenRange(start_address, range_size, add_download); | ||
| 956 | const IntervalType subtract_interval{start_address, end_address}; | ||
| 957 | common_ranges.subtract(subtract_interval); | ||
| 958 | }); | 533 | }); |
| 959 | }); | 534 | }); |
| 960 | } | 535 | } |
| 961 | } | 536 | } |
| 962 | committed_ranges.clear(); | 537 | committed_ranges.clear(); |
| 963 | if (downloads.empty()) { | 538 | if (downloads.empty()) { |
| 539 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 540 | if (active_async_buffers) { | ||
| 541 | async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||
| 542 | } | ||
| 543 | } | ||
| 964 | return; | 544 | return; |
| 965 | } | 545 | } |
| 966 | if constexpr (USE_MEMORY_MAPS) { | 546 | if (active_async_buffers) { |
| 967 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | 547 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 968 | runtime.PreCopyBarrier(); | 548 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 969 | for (auto& [copy, buffer_id] : downloads) { | 549 | boost::container::small_vector<BufferCopy, 4> normalized_copies; |
| 970 | // Have in mind the staging buffer offset for the copy | 550 | IntervalSet new_async_range{}; |
| 971 | copy.dst_offset += download_staging.offset; | 551 | runtime.PreCopyBarrier(); |
| 972 | const std::array copies{copy}; | 552 | for (auto& [copy, buffer_id] : downloads) { |
| 973 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | 553 | copy.dst_offset += download_staging.offset; |
| 974 | } | 554 | const std::array copies{copy}; |
| 975 | runtime.PostCopyBarrier(); | 555 | BufferCopy second_copy{copy}; |
| 976 | runtime.Finish(); | 556 | Buffer& buffer = slot_buffers[buffer_id]; |
| 977 | for (const auto& [copy, buffer_id] : downloads) { | 557 | second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |
| 978 | const Buffer& buffer = slot_buffers[buffer_id]; | 558 | VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |
| 979 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 559 | const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |
| 980 | // Undo the modified offset | 560 | async_downloads += std::make_pair(base_interval, 1); |
| 981 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | 561 | runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |
| 982 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | 562 | normalized_copies.push_back(second_copy); |
| 983 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | 563 | } |
| 564 | runtime.PostCopyBarrier(); | ||
| 565 | pending_downloads.emplace_back(std::move(normalized_copies)); | ||
| 566 | async_buffers.emplace_back(download_staging); | ||
| 567 | } else { | ||
| 568 | committed_ranges.clear(); | ||
| 569 | uncommitted_ranges.clear(); | ||
| 984 | } | 570 | } |
| 985 | } else { | 571 | } else { |
| 986 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | 572 | if constexpr (USE_MEMORY_MAPS) { |
| 987 | for (const auto& [copy, buffer_id] : downloads) { | 573 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 988 | Buffer& buffer = slot_buffers[buffer_id]; | 574 | runtime.PreCopyBarrier(); |
| 989 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | 575 | for (auto& [copy, buffer_id] : downloads) { |
| 990 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | 576 | // Have in mind the staging buffer offset for the copy |
| 991 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 577 | copy.dst_offset += download_staging.offset; |
| 578 | const std::array copies{copy}; | ||
| 579 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); | ||
| 580 | } | ||
| 581 | runtime.PostCopyBarrier(); | ||
| 582 | runtime.Finish(); | ||
| 583 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 584 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 585 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 586 | // Undo the modified offset | ||
| 587 | const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||
| 588 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 589 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||
| 590 | } | ||
| 591 | } else { | ||
| 592 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 593 | for (const auto& [copy, buffer_id] : downloads) { | ||
| 594 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 595 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 596 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 597 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 598 | } | ||
| 992 | } | 599 | } |
| 993 | } | 600 | } |
| 994 | } | 601 | } |
| 995 | 602 | ||
| 996 | template <class P> | 603 | template <class P> |
| 997 | void BufferCache<P>::CommitAsyncFlushes() { | 604 | void BufferCache<P>::CommitAsyncFlushes() { |
| 998 | if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { | 605 | CommitAsyncFlushesHigh(); |
| 999 | CommitAsyncFlushesHigh(); | ||
| 1000 | } else { | ||
| 1001 | uncommitted_ranges.clear(); | ||
| 1002 | committed_ranges.clear(); | ||
| 1003 | } | ||
| 1004 | } | 606 | } |
| 1005 | 607 | ||
| 1006 | template <class P> | 608 | template <class P> |
| 1007 | void BufferCache<P>::PopAsyncFlushes() {} | 609 | void BufferCache<P>::PopAsyncFlushes() { |
| 610 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 611 | PopAsyncBuffers(); | ||
| 612 | } | ||
| 1008 | 613 | ||
| 1009 | template <class P> | 614 | template <class P> |
| 1010 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | 615 | void BufferCache<P>::PopAsyncBuffers() { |
| 1011 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | 616 | if (async_buffers.empty()) { |
| 1012 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | 617 | return; |
| 1013 | const BufferId image_id = page_table[page]; | 618 | } |
| 1014 | if (!image_id) { | 619 | if (!async_buffers.front().has_value()) { |
| 1015 | ++page; | 620 | async_buffers.pop_front(); |
| 1016 | continue; | 621 | return; |
| 1017 | } | 622 | } |
| 1018 | Buffer& buffer = slot_buffers[image_id]; | 623 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 1019 | if (buffer.IsRegionGpuModified(addr, size)) { | 624 | auto& downloads = pending_downloads.front(); |
| 1020 | return true; | 625 | auto& async_buffer = async_buffers.front(); |
| 626 | u8* base = async_buffer->mapped_span.data(); | ||
| 627 | const size_t base_offset = async_buffer->offset; | ||
| 628 | for (const auto& copy : downloads) { | ||
| 629 | const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset); | ||
| 630 | const u64 dst_offset = copy.dst_offset - base_offset; | ||
| 631 | const u8* read_mapped_memory = base + dst_offset; | ||
| 632 | ForEachInOverlapCounter( | ||
| 633 | async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) { | ||
| 634 | cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr], | ||
| 635 | end - start); | ||
| 636 | if (count == 1) { | ||
| 637 | const IntervalType base_interval{start, end}; | ||
| 638 | common_ranges.subtract(base_interval); | ||
| 639 | } | ||
| 640 | }); | ||
| 641 | const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | ||
| 642 | RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | ||
| 1021 | } | 643 | } |
| 1022 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | 644 | async_buffers_death_ring.emplace_back(*async_buffer); |
| 1023 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 645 | async_buffers.pop_front(); |
| 646 | pending_downloads.pop_front(); | ||
| 1024 | } | 647 | } |
| 1025 | return false; | 648 | } |
| 649 | |||
| 650 | template <class P> | ||
| 651 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 652 | bool is_dirty = false; | ||
| 653 | ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; }); | ||
| 654 | return is_dirty; | ||
| 1026 | } | 655 | } |
| 1027 | 656 | ||
| 1028 | template <class P> | 657 | template <class P> |
| 1029 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | 658 | bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { |
| 1030 | const VAddr end_addr = addr + size; | 659 | const VAddr end_addr = addr + size; |
| 1031 | const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 660 | const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 1032 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | 661 | for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) { |
| 1033 | const BufferId buffer_id = page_table[page]; | 662 | const BufferId buffer_id = page_table[page]; |
| 1034 | if (!buffer_id) { | 663 | if (!buffer_id) { |
| 1035 | ++page; | 664 | ++page; |
| @@ -1041,28 +670,14 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { | |||
| 1041 | if (buf_start_addr < end_addr && addr < buf_end_addr) { | 670 | if (buf_start_addr < end_addr && addr < buf_end_addr) { |
| 1042 | return true; | 671 | return true; |
| 1043 | } | 672 | } |
| 1044 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | 673 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); |
| 1045 | } | 674 | } |
| 1046 | return false; | 675 | return false; |
| 1047 | } | 676 | } |
| 1048 | 677 | ||
| 1049 | template <class P> | 678 | template <class P> |
| 1050 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { | 679 | bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { |
| 1051 | const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); | 680 | return memory_tracker.IsRegionCpuModified(addr, size); |
| 1052 | for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { | ||
| 1053 | const BufferId image_id = page_table[page]; | ||
| 1054 | if (!image_id) { | ||
| 1055 | ++page; | ||
| 1056 | continue; | ||
| 1057 | } | ||
| 1058 | Buffer& buffer = slot_buffers[image_id]; | ||
| 1059 | if (buffer.IsRegionCpuModified(addr, size)) { | ||
| 1060 | return true; | ||
| 1061 | } | ||
| 1062 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 1063 | page = Common::DivCeil(end_addr, YUZU_PAGESIZE); | ||
| 1064 | } | ||
| 1065 | return false; | ||
| 1066 | } | 681 | } |
| 1067 | 682 | ||
| 1068 | template <class P> | 683 | template <class P> |
| @@ -1072,7 +687,7 @@ void BufferCache<P>::BindHostIndexBuffer() { | |||
| 1072 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | 687 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); |
| 1073 | const u32 size = index_buffer.size; | 688 | const u32 size = index_buffer.size; |
| 1074 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 689 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1075 | if (!draw_state.inline_index_draw_indexes.empty()) { | 690 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 1076 | if constexpr (USE_MEMORY_MAPS) { | 691 | if constexpr (USE_MEMORY_MAPS) { |
| 1077 | auto upload_staging = runtime.UploadStagingBuffer(size); | 692 | auto upload_staging = runtime.UploadStagingBuffer(size); |
| 1078 | std::array<BufferCopy, 1> copies{ | 693 | std::array<BufferCopy, 1> copies{ |
| @@ -1155,7 +770,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | |||
| 1155 | TouchBuffer(buffer, binding.buffer_id); | 770 | TouchBuffer(buffer, binding.buffer_id); |
| 1156 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && | 771 | const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && |
| 1157 | size <= uniform_buffer_skip_cache_size && | 772 | size <= uniform_buffer_skip_cache_size && |
| 1158 | !buffer.IsRegionGpuModified(cpu_addr, size); | 773 | !memory_tracker.IsRegionGpuModified(cpu_addr, size); |
| 1159 | if (use_fast_buffer) { | 774 | if (use_fast_buffer) { |
| 1160 | if constexpr (IS_OPENGL) { | 775 | if constexpr (IS_OPENGL) { |
| 1161 | if (runtime.HasFastBufferSubData()) { | 776 | if (runtime.HasFastBufferSubData()) { |
| @@ -1378,27 +993,36 @@ void BufferCache<P>::UpdateIndexBuffer() { | |||
| 1378 | // We have to check for the dirty flags and index count | 993 | // We have to check for the dirty flags and index count |
| 1379 | // The index count is currently changed without updating the dirty flags | 994 | // The index count is currently changed without updating the dirty flags |
| 1380 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | 995 | const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); |
| 1381 | const auto& index_array = draw_state.index_buffer; | 996 | const auto& index_buffer_ref = draw_state.index_buffer; |
| 1382 | auto& flags = maxwell3d->dirty.flags; | 997 | auto& flags = maxwell3d->dirty.flags; |
| 1383 | if (!flags[Dirty::IndexBuffer]) { | 998 | if (!flags[Dirty::IndexBuffer]) { |
| 1384 | return; | 999 | return; |
| 1385 | } | 1000 | } |
| 1386 | flags[Dirty::IndexBuffer] = false; | 1001 | flags[Dirty::IndexBuffer] = false; |
| 1387 | last_index_count = index_array.count; | 1002 | if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { |
| 1388 | if (!draw_state.inline_index_draw_indexes.empty()) { | ||
| 1389 | auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); | 1003 | auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); |
| 1004 | u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE); | ||
| 1005 | if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] { | ||
| 1006 | inline_buffer_id = CreateBuffer(0, buffer_size); | ||
| 1007 | } | ||
| 1008 | if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] { | ||
| 1009 | slot_buffers.erase(inline_buffer_id); | ||
| 1010 | inline_buffer_id = CreateBuffer(0, buffer_size); | ||
| 1011 | } | ||
| 1390 | index_buffer = Binding{ | 1012 | index_buffer = Binding{ |
| 1391 | .cpu_addr = 0, | 1013 | .cpu_addr = 0, |
| 1392 | .size = inline_index_size, | 1014 | .size = inline_index_size, |
| 1393 | .buffer_id = CreateBuffer(0, inline_index_size), | 1015 | .buffer_id = inline_buffer_id, |
| 1394 | }; | 1016 | }; |
| 1395 | return; | 1017 | return; |
| 1396 | } | 1018 | } |
| 1397 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | 1019 | |
| 1398 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | 1020 | const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress(); |
| 1021 | const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress(); | ||
| 1399 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1022 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1400 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | 1023 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1401 | const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); | 1024 | const u32 draw_size = |
| 1025 | (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes(); | ||
| 1402 | const u32 size = std::min(address_size, draw_size); | 1026 | const u32 size = std::min(address_size, draw_size); |
| 1403 | if (size == 0 || !cpu_addr) { | 1027 | if (size == 0 || !cpu_addr) { |
| 1404 | index_buffer = NULL_BINDING; | 1028 | index_buffer = NULL_BINDING; |
| @@ -1434,17 +1058,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { | |||
| 1434 | const GPUVAddr gpu_addr_begin = array.Address(); | 1058 | const GPUVAddr gpu_addr_begin = array.Address(); |
| 1435 | const GPUVAddr gpu_addr_end = limit.Address() + 1; | 1059 | const GPUVAddr gpu_addr_end = limit.Address() + 1; |
| 1436 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); | 1060 | const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); |
| 1437 | u32 address_size = static_cast<u32>( | 1061 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |
| 1438 | std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); | 1062 | u32 size = address_size; // TODO: Analyze stride and number of vertices |
| 1439 | if (array.enable == 0 || address_size == 0 || !cpu_addr) { | 1063 | if (array.enable == 0 || size == 0 || !cpu_addr) { |
| 1440 | vertex_buffers[index] = NULL_BINDING; | 1064 | vertex_buffers[index] = NULL_BINDING; |
| 1441 | return; | 1065 | return; |
| 1442 | } | 1066 | } |
| 1443 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { | 1067 | if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { |
| 1444 | address_size = | 1068 | size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); |
| 1445 | static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size)); | ||
| 1446 | } | 1069 | } |
| 1447 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 1448 | vertex_buffers[index] = Binding{ | 1070 | vertex_buffers[index] = Binding{ |
| 1449 | .cpu_addr = *cpu_addr, | 1071 | .cpu_addr = *cpu_addr, |
| 1450 | .size = size, | 1072 | .size = size, |
| @@ -1591,17 +1213,16 @@ void BufferCache<P>::UpdateComputeTextureBuffers() { | |||
| 1591 | 1213 | ||
| 1592 | template <class P> | 1214 | template <class P> |
| 1593 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | 1215 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { |
| 1594 | Buffer& buffer = slot_buffers[buffer_id]; | 1216 | memory_tracker.MarkRegionAsGpuModified(cpu_addr, size); |
| 1595 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | 1217 | |
| 1218 | if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) { | ||
| 1219 | SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size); | ||
| 1220 | } | ||
| 1596 | 1221 | ||
| 1597 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; | 1222 | const IntervalType base_interval{cpu_addr, cpu_addr + size}; |
| 1598 | common_ranges.add(base_interval); | 1223 | common_ranges.add(base_interval); |
| 1599 | |||
| 1600 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 1601 | if (!is_async) { | ||
| 1602 | return; | ||
| 1603 | } | ||
| 1604 | uncommitted_ranges.add(base_interval); | 1224 | uncommitted_ranges.add(base_interval); |
| 1225 | pending_ranges.add(base_interval); | ||
| 1605 | } | 1226 | } |
| 1606 | 1227 | ||
| 1607 | template <class P> | 1228 | template <class P> |
| @@ -1609,7 +1230,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | |||
| 1609 | if (cpu_addr == 0) { | 1230 | if (cpu_addr == 0) { |
| 1610 | return NULL_BUFFER_ID; | 1231 | return NULL_BUFFER_ID; |
| 1611 | } | 1232 | } |
| 1612 | const u64 page = cpu_addr >> YUZU_PAGEBITS; | 1233 | const u64 page = cpu_addr >> CACHING_PAGEBITS; |
| 1613 | const BufferId buffer_id = page_table[page]; | 1234 | const BufferId buffer_id = page_table[page]; |
| 1614 | if (!buffer_id) { | 1235 | if (!buffer_id) { |
| 1615 | return CreateBuffer(cpu_addr, size); | 1236 | return CreateBuffer(cpu_addr, size); |
| @@ -1638,9 +1259,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1638 | .has_stream_leap = has_stream_leap, | 1259 | .has_stream_leap = has_stream_leap, |
| 1639 | }; | 1260 | }; |
| 1640 | } | 1261 | } |
| 1641 | for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE); | 1262 | for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE); |
| 1642 | cpu_addr += YUZU_PAGESIZE) { | 1263 | cpu_addr += CACHING_PAGESIZE) { |
| 1643 | const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS]; | 1264 | const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS]; |
| 1644 | if (!overlap_id) { | 1265 | if (!overlap_id) { |
| 1645 | continue; | 1266 | continue; |
| 1646 | } | 1267 | } |
| @@ -1666,11 +1287,11 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu | |||
| 1666 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | 1287 | // as a stream buffer. Increase the size to skip constantly recreating buffers. |
| 1667 | has_stream_leap = true; | 1288 | has_stream_leap = true; |
| 1668 | if (expands_right) { | 1289 | if (expands_right) { |
| 1669 | begin -= YUZU_PAGESIZE * 256; | 1290 | begin -= CACHING_PAGESIZE * 256; |
| 1670 | cpu_addr = begin; | 1291 | cpu_addr = begin; |
| 1671 | } | 1292 | } |
| 1672 | if (expands_left) { | 1293 | if (expands_left) { |
| 1673 | end += YUZU_PAGESIZE * 256; | 1294 | end += CACHING_PAGESIZE * 256; |
| 1674 | } | 1295 | } |
| 1675 | } | 1296 | } |
| 1676 | } | 1297 | } |
| @@ -1690,25 +1311,22 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, | |||
| 1690 | if (accumulate_stream_score) { | 1311 | if (accumulate_stream_score) { |
| 1691 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); | 1312 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); |
| 1692 | } | 1313 | } |
| 1693 | std::vector<BufferCopy> copies; | 1314 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1694 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); | 1315 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); |
| 1695 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { | 1316 | copies.push_back(BufferCopy{ |
| 1696 | copies.push_back(BufferCopy{ | 1317 | .src_offset = 0, |
| 1697 | .src_offset = begin, | 1318 | .dst_offset = dst_base_offset, |
| 1698 | .dst_offset = dst_base_offset + begin, | 1319 | .size = overlap.SizeBytes(), |
| 1699 | .size = range_size, | ||
| 1700 | }); | ||
| 1701 | new_buffer.UnmarkRegionAsCpuModified(begin, range_size); | ||
| 1702 | new_buffer.MarkRegionAsGpuModified(begin, range_size); | ||
| 1703 | }); | 1320 | }); |
| 1704 | if (!copies.empty()) { | 1321 | runtime.CopyBuffer(new_buffer, overlap, copies); |
| 1705 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); | 1322 | DeleteBuffer(overlap_id, true); |
| 1706 | } | ||
| 1707 | DeleteBuffer(overlap_id); | ||
| 1708 | } | 1323 | } |
| 1709 | 1324 | ||
| 1710 | template <class P> | 1325 | template <class P> |
| 1711 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | 1326 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { |
| 1327 | VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE); | ||
| 1328 | cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE); | ||
| 1329 | wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr); | ||
| 1712 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | 1330 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); |
| 1713 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | 1331 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); |
| 1714 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | 1332 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); |
| @@ -1718,7 +1336,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | |||
| 1718 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | 1336 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); |
| 1719 | } | 1337 | } |
| 1720 | Register(new_buffer_id); | 1338 | Register(new_buffer_id); |
| 1721 | TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); | 1339 | TouchBuffer(new_buffer, new_buffer_id); |
| 1722 | return new_buffer_id; | 1340 | return new_buffer_id; |
| 1723 | } | 1341 | } |
| 1724 | 1342 | ||
| @@ -1746,8 +1364,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | |||
| 1746 | } | 1364 | } |
| 1747 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | 1365 | const VAddr cpu_addr_begin = buffer.CpuAddr(); |
| 1748 | const VAddr cpu_addr_end = cpu_addr_begin + size; | 1366 | const VAddr cpu_addr_end = cpu_addr_begin + size; |
| 1749 | const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE; | 1367 | const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE; |
| 1750 | const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE); | 1368 | const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE); |
| 1751 | for (u64 page = page_begin; page != page_end; ++page) { | 1369 | for (u64 page = page_begin; page != page_end; ++page) { |
| 1752 | if constexpr (insert) { | 1370 | if constexpr (insert) { |
| 1753 | page_table[page] = buffer_id; | 1371 | page_table[page] = buffer_id; |
| @@ -1766,9 +1384,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { | |||
| 1766 | 1384 | ||
| 1767 | template <class P> | 1385 | template <class P> |
| 1768 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { | 1386 | bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 1769 | if (buffer.CpuAddr() == 0) { | ||
| 1770 | return true; | ||
| 1771 | } | ||
| 1772 | return SynchronizeBufferImpl(buffer, cpu_addr, size); | 1387 | return SynchronizeBufferImpl(buffer, cpu_addr, size); |
| 1773 | } | 1388 | } |
| 1774 | 1389 | ||
| @@ -1777,10 +1392,11 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1777 | boost::container::small_vector<BufferCopy, 4> copies; | 1392 | boost::container::small_vector<BufferCopy, 4> copies; |
| 1778 | u64 total_size_bytes = 0; | 1393 | u64 total_size_bytes = 0; |
| 1779 | u64 largest_copy = 0; | 1394 | u64 largest_copy = 0; |
| 1780 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1395 | VAddr buffer_start = buffer.CpuAddr(); |
| 1396 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1781 | copies.push_back(BufferCopy{ | 1397 | copies.push_back(BufferCopy{ |
| 1782 | .src_offset = total_size_bytes, | 1398 | .src_offset = total_size_bytes, |
| 1783 | .dst_offset = range_offset, | 1399 | .dst_offset = cpu_addr_out - buffer_start, |
| 1784 | .size = range_size, | 1400 | .size = range_size, |
| 1785 | }); | 1401 | }); |
| 1786 | total_size_bytes += range_size; | 1402 | total_size_bytes += range_size; |
| @@ -1795,6 +1411,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s | |||
| 1795 | } | 1411 | } |
| 1796 | 1412 | ||
| 1797 | template <class P> | 1413 | template <class P> |
| 1414 | bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1415 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1416 | u64 total_size_bytes = 0; | ||
| 1417 | u64 largest_copy = 0; | ||
| 1418 | IntervalSet found_sets{}; | ||
| 1419 | auto make_copies = [&] { | ||
| 1420 | for (auto& interval : found_sets) { | ||
| 1421 | const std::size_t sub_size = interval.upper() - interval.lower(); | ||
| 1422 | const VAddr cpu_addr_ = interval.lower(); | ||
| 1423 | copies.push_back(BufferCopy{ | ||
| 1424 | .src_offset = total_size_bytes, | ||
| 1425 | .dst_offset = cpu_addr_ - buffer.CpuAddr(), | ||
| 1426 | .size = sub_size, | ||
| 1427 | }); | ||
| 1428 | total_size_bytes += sub_size; | ||
| 1429 | largest_copy = std::max<u64>(largest_copy, sub_size); | ||
| 1430 | } | ||
| 1431 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1432 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1433 | }; | ||
| 1434 | memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { | ||
| 1435 | const VAddr base_adr = cpu_addr_out; | ||
| 1436 | const VAddr end_adr = base_adr + range_size; | ||
| 1437 | const IntervalType add_interval{base_adr, end_adr}; | ||
| 1438 | found_sets.add(add_interval); | ||
| 1439 | }); | ||
| 1440 | if (found_sets.empty()) { | ||
| 1441 | return true; | ||
| 1442 | } | ||
| 1443 | const IntervalType search_interval{cpu_addr, cpu_addr + size}; | ||
| 1444 | auto it = common_ranges.lower_bound(search_interval); | ||
| 1445 | auto it_end = common_ranges.upper_bound(search_interval); | ||
| 1446 | if (it == common_ranges.end()) { | ||
| 1447 | make_copies(); | ||
| 1448 | return false; | ||
| 1449 | } | ||
| 1450 | while (it != it_end) { | ||
| 1451 | found_sets.subtract(*it); | ||
| 1452 | it++; | ||
| 1453 | } | ||
| 1454 | make_copies(); | ||
| 1455 | return false; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | template <class P> | ||
| 1798 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | 1459 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, |
| 1799 | std::span<BufferCopy> copies) { | 1460 | std::span<BufferCopy> copies) { |
| 1800 | if constexpr (USE_MEMORY_MAPS) { | 1461 | if constexpr (USE_MEMORY_MAPS) { |
| @@ -1805,39 +1466,45 @@ void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 larg | |||
| 1805 | } | 1466 | } |
| 1806 | 1467 | ||
| 1807 | template <class P> | 1468 | template <class P> |
| 1808 | void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | 1469 | void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1809 | std::span<const BufferCopy> copies) { | 1470 | [[maybe_unused]] u64 largest_copy, |
| 1810 | std::span<u8> immediate_buffer; | 1471 | [[maybe_unused]] std::span<const BufferCopy> copies) { |
| 1811 | for (const BufferCopy& copy : copies) { | 1472 | if constexpr (!USE_MEMORY_MAPS) { |
| 1812 | std::span<const u8> upload_span; | 1473 | std::span<u8> immediate_buffer; |
| 1813 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1474 | for (const BufferCopy& copy : copies) { |
| 1814 | if (IsRangeGranular(cpu_addr, copy.size)) { | 1475 | std::span<const u8> upload_span; |
| 1815 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | 1476 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; |
| 1816 | } else { | 1477 | if (IsRangeGranular(cpu_addr, copy.size)) { |
| 1817 | if (immediate_buffer.empty()) { | 1478 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); |
| 1818 | immediate_buffer = ImmediateBuffer(largest_copy); | 1479 | } else { |
| 1480 | if (immediate_buffer.empty()) { | ||
| 1481 | immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 1482 | } | ||
| 1483 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1484 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 1819 | } | 1485 | } |
| 1820 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | 1486 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| 1821 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 1822 | } | 1487 | } |
| 1823 | buffer.ImmediateUpload(copy.dst_offset, upload_span); | ||
| 1824 | } | 1488 | } |
| 1825 | } | 1489 | } |
| 1826 | 1490 | ||
| 1827 | template <class P> | 1491 | template <class P> |
| 1828 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, | 1492 | void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, |
| 1829 | std::span<BufferCopy> copies) { | 1493 | [[maybe_unused]] u64 total_size_bytes, |
| 1830 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); | 1494 | [[maybe_unused]] std::span<BufferCopy> copies) { |
| 1831 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | 1495 | if constexpr (USE_MEMORY_MAPS) { |
| 1832 | for (BufferCopy& copy : copies) { | 1496 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); |
| 1833 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | 1497 | const std::span<u8> staging_pointer = upload_staging.mapped_span; |
| 1834 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | 1498 | for (BufferCopy& copy : copies) { |
| 1835 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | 1499 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; |
| 1500 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1501 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | ||
| 1836 | 1502 | ||
| 1837 | // Apply the staging offset | 1503 | // Apply the staging offset |
| 1838 | copy.src_offset += upload_staging.offset; | 1504 | copy.src_offset += upload_staging.offset; |
| 1505 | } | ||
| 1506 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1839 | } | 1507 | } |
| 1840 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | ||
| 1841 | } | 1508 | } |
| 1842 | 1509 | ||
| 1843 | template <class P> | 1510 | template <class P> |
| @@ -1847,7 +1514,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||
| 1847 | if (!is_dirty) { | 1514 | if (!is_dirty) { |
| 1848 | return false; | 1515 | return false; |
| 1849 | } | 1516 | } |
| 1850 | if (!IsRegionGpuModified(dest_address, copy_size)) { | 1517 | VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE); |
| 1518 | VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE); | ||
| 1519 | if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) { | ||
| 1851 | return false; | 1520 | return false; |
| 1852 | } | 1521 | } |
| 1853 | 1522 | ||
| @@ -1886,30 +1555,31 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1886 | boost::container::small_vector<BufferCopy, 1> copies; | 1555 | boost::container::small_vector<BufferCopy, 1> copies; |
| 1887 | u64 total_size_bytes = 0; | 1556 | u64 total_size_bytes = 0; |
| 1888 | u64 largest_copy = 0; | 1557 | u64 largest_copy = 0; |
| 1889 | buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | 1558 | memory_tracker.ForEachDownloadRangeAndClear( |
| 1890 | const VAddr buffer_addr = buffer.CpuAddr(); | 1559 | cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) { |
| 1891 | const auto add_download = [&](VAddr start, VAddr end) { | 1560 | const VAddr buffer_addr = buffer.CpuAddr(); |
| 1892 | const u64 new_offset = start - buffer_addr; | 1561 | const auto add_download = [&](VAddr start, VAddr end) { |
| 1893 | const u64 new_size = end - start; | 1562 | const u64 new_offset = start - buffer_addr; |
| 1894 | copies.push_back(BufferCopy{ | 1563 | const u64 new_size = end - start; |
| 1895 | .src_offset = new_offset, | 1564 | copies.push_back(BufferCopy{ |
| 1896 | .dst_offset = total_size_bytes, | 1565 | .src_offset = new_offset, |
| 1897 | .size = new_size, | 1566 | .dst_offset = total_size_bytes, |
| 1898 | }); | 1567 | .size = new_size, |
| 1899 | // Align up to avoid cache conflicts | 1568 | }); |
| 1900 | constexpr u64 align = 256ULL; | 1569 | // Align up to avoid cache conflicts |
| 1901 | constexpr u64 mask = ~(align - 1ULL); | 1570 | constexpr u64 align = 64ULL; |
| 1902 | total_size_bytes += (new_size + align - 1) & mask; | 1571 | constexpr u64 mask = ~(align - 1ULL); |
| 1903 | largest_copy = std::max(largest_copy, new_size); | 1572 | total_size_bytes += (new_size + align - 1) & mask; |
| 1904 | }; | 1573 | largest_copy = std::max(largest_copy, new_size); |
| 1905 | 1574 | }; | |
| 1906 | const VAddr start_address = buffer_addr + range_offset; | 1575 | |
| 1907 | const VAddr end_address = start_address + range_size; | 1576 | const VAddr start_address = cpu_addr_out; |
| 1908 | ForEachWrittenRange(start_address, range_size, add_download); | 1577 | const VAddr end_address = start_address + range_size; |
| 1909 | const IntervalType subtract_interval{start_address, end_address}; | 1578 | ForEachInRangeSet(common_ranges, start_address, range_size, add_download); |
| 1910 | ClearDownload(subtract_interval); | 1579 | const IntervalType subtract_interval{start_address, end_address}; |
| 1911 | common_ranges.subtract(subtract_interval); | 1580 | ClearDownload(subtract_interval); |
| 1912 | }); | 1581 | common_ranges.subtract(subtract_interval); |
| 1582 | }); | ||
| 1913 | if (total_size_bytes == 0) { | 1583 | if (total_size_bytes == 0) { |
| 1914 | return; | 1584 | return; |
| 1915 | } | 1585 | } |
| @@ -1943,7 +1613,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||
| 1943 | } | 1613 | } |
| 1944 | 1614 | ||
| 1945 | template <class P> | 1615 | template <class P> |
| 1946 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | 1616 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { |
| 1947 | const auto scalar_replace = [buffer_id](Binding& binding) { | 1617 | const auto scalar_replace = [buffer_id](Binding& binding) { |
| 1948 | if (binding.buffer_id == buffer_id) { | 1618 | if (binding.buffer_id == buffer_id) { |
| 1949 | binding.buffer_id = BufferId{}; | 1619 | binding.buffer_id = BufferId{}; |
| @@ -1962,8 +1632,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | |||
| 1962 | std::erase(cached_write_buffer_ids, buffer_id); | 1632 | std::erase(cached_write_buffer_ids, buffer_id); |
| 1963 | 1633 | ||
| 1964 | // Mark the whole buffer as CPU written to stop tracking CPU writes | 1634 | // Mark the whole buffer as CPU written to stop tracking CPU writes |
| 1965 | Buffer& buffer = slot_buffers[buffer_id]; | 1635 | if (!do_not_mark) { |
| 1966 | buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | 1636 | Buffer& buffer = slot_buffers[buffer_id]; |
| 1637 | memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1638 | } | ||
| 1967 | 1639 | ||
| 1968 | Unregister(buffer_id); | 1640 | Unregister(buffer_id); |
| 1969 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | 1641 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); |
| @@ -2011,7 +1683,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s | |||
| 2011 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); | 1683 | LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); |
| 2012 | return NULL_BINDING; | 1684 | return NULL_BINDING; |
| 2013 | } | 1685 | } |
| 2014 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | 1686 | const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE); |
| 2015 | const Binding binding{ | 1687 | const Binding binding{ |
| 2016 | .cpu_addr = *cpu_addr, | 1688 | .cpu_addr = *cpu_addr, |
| 2017 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), | 1689 | .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h new file mode 100644 index 000000000..656baa550 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -0,0 +1,580 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 11 | #include <numeric> | ||
| 12 | #include <span> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include <boost/container/small_vector.hpp> | ||
| 17 | #define BOOST_NO_MT | ||
| 18 | #include <boost/pool/detail/mutex.hpp> | ||
| 19 | #undef BOOST_NO_MT | ||
| 20 | #include <boost/icl/interval.hpp> | ||
| 21 | #include <boost/icl/interval_base_set.hpp> | ||
| 22 | #include <boost/icl/interval_set.hpp> | ||
| 23 | #include <boost/icl/split_interval_map.hpp> | ||
| 24 | #include <boost/pool/pool.hpp> | ||
| 25 | #include <boost/pool/pool_alloc.hpp> | ||
| 26 | #include <boost/pool/poolfwd.hpp> | ||
| 27 | |||
| 28 | #include "common/common_types.h" | ||
| 29 | #include "common/div_ceil.h" | ||
| 30 | #include "common/literals.h" | ||
| 31 | #include "common/lru_cache.h" | ||
| 32 | #include "common/microprofile.h" | ||
| 33 | #include "common/scope_exit.h" | ||
| 34 | #include "common/settings.h" | ||
| 35 | #include "core/memory.h" | ||
| 36 | #include "video_core/buffer_cache/buffer_base.h" | ||
| 37 | #include "video_core/control/channel_state_cache.h" | ||
| 38 | #include "video_core/delayed_destruction_ring.h" | ||
| 39 | #include "video_core/dirty_flags.h" | ||
| 40 | #include "video_core/engines/draw_manager.h" | ||
| 41 | #include "video_core/engines/kepler_compute.h" | ||
| 42 | #include "video_core/engines/maxwell_3d.h" | ||
| 43 | #include "video_core/memory_manager.h" | ||
| 44 | #include "video_core/rasterizer_interface.h" | ||
| 45 | #include "video_core/surface.h" | ||
| 46 | #include "video_core/texture_cache/slot_vector.h" | ||
| 47 | #include "video_core/texture_cache/types.h" | ||
| 48 | |||
| 49 | namespace boost { | ||
| 50 | template <typename T> | ||
| 51 | class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>; | ||
| 52 | } | ||
| 53 | |||
| 54 | namespace VideoCommon { | ||
| 55 | |||
| 56 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); | ||
| 57 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 58 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 59 | |||
| 60 | using BufferId = SlotId; | ||
| 61 | |||
| 62 | using VideoCore::Surface::PixelFormat; | ||
| 63 | using namespace Common::Literals; | ||
| 64 | |||
| 65 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 66 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 67 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 68 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 69 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 70 | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||
| 71 | constexpr u32 NUM_STAGES = 5; | ||
| 72 | |||
| 73 | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||
| 74 | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||
| 75 | |||
| 76 | enum class ObtainBufferSynchronize : u32 { | ||
| 77 | NoSynchronize = 0, | ||
| 78 | FullSynchronize = 1, | ||
| 79 | SynchronizeNoDirty = 2, | ||
| 80 | }; | ||
| 81 | |||
| 82 | enum class ObtainBufferOperation : u32 { | ||
| 83 | DoNothing = 0, | ||
| 84 | MarkAsWritten = 1, | ||
| 85 | DiscardWrite = 2, | ||
| 86 | MarkQuery = 3, | ||
| 87 | }; | ||
| 88 | |||
| 89 | template <typename P> | ||
| 90 | class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> { | ||
| 91 | // Page size for caching purposes. | ||
| 92 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. | ||
| 93 | static constexpr u32 CACHING_PAGEBITS = 16; | ||
| 94 | static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; | ||
| 95 | |||
| 96 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 97 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 98 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 99 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 100 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 101 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 102 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 103 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 104 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 105 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; | ||
| 106 | |||
| 107 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 108 | |||
| 109 | static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB; | ||
| 110 | static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB; | ||
| 111 | static constexpr s64 TARGET_THRESHOLD = 4_GiB; | ||
| 112 | |||
| 113 | // Debug Flags. | ||
| 114 | |||
| 115 | static constexpr bool DISABLE_DOWNLOADS = true; | ||
| 116 | |||
| 117 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 118 | |||
| 119 | using Runtime = typename P::Runtime; | ||
| 120 | using Buffer = typename P::Buffer; | ||
| 121 | using Async_Buffer = typename P::Async_Buffer; | ||
| 122 | using MemoryTracker = typename P::MemoryTracker; | ||
| 123 | |||
| 124 | using IntervalCompare = std::less<VAddr>; | ||
| 125 | using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>; | ||
| 126 | using IntervalAllocator = boost::fast_pool_allocator<VAddr>; | ||
| 127 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 128 | using IntervalType = typename IntervalSet::interval_type; | ||
| 129 | |||
| 130 | template <typename Type> | ||
| 131 | struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> { | ||
| 132 | // types | ||
| 133 | typedef counter_add_functor<Type> type; | ||
| 134 | typedef boost::icl::identity_based_inplace_combine<Type> base_type; | ||
| 135 | |||
| 136 | // public member functions | ||
| 137 | void operator()(Type& current, const Type& added) const { | ||
| 138 | current += added; | ||
| 139 | if (current < base_type::identity_element()) { | ||
| 140 | current = base_type::identity_element(); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | // public static functions | ||
| 145 | static void version(Type&){}; | ||
| 146 | }; | ||
| 147 | |||
| 148 | using OverlapCombine = counter_add_functor<int>; | ||
| 149 | using OverlapSection = boost::icl::inter_section<int>; | ||
| 150 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | ||
| 151 | |||
| 152 | struct Empty {}; | ||
| 153 | |||
| 154 | struct OverlapResult { | ||
| 155 | std::vector<BufferId> ids; | ||
| 156 | VAddr begin; | ||
| 157 | VAddr end; | ||
| 158 | bool has_stream_leap = false; | ||
| 159 | }; | ||
| 160 | |||
| 161 | struct Binding { | ||
| 162 | VAddr cpu_addr{}; | ||
| 163 | u32 size{}; | ||
| 164 | BufferId buffer_id; | ||
| 165 | }; | ||
| 166 | |||
| 167 | struct TextureBufferBinding : Binding { | ||
| 168 | PixelFormat format; | ||
| 169 | }; | ||
| 170 | |||
| 171 | static constexpr Binding NULL_BINDING{ | ||
| 172 | .cpu_addr = 0, | ||
| 173 | .size = 0, | ||
| 174 | .buffer_id = NULL_BUFFER_ID, | ||
| 175 | }; | ||
| 176 | |||
| 177 | public: | ||
| 178 | static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB); | ||
| 179 | |||
| 180 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 181 | Core::Memory::Memory& cpu_memory_, Runtime& runtime_); | ||
| 182 | |||
| 183 | void TickFrame(); | ||
| 184 | |||
| 185 | void WriteMemory(VAddr cpu_addr, u64 size); | ||
| 186 | |||
| 187 | void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||
| 188 | |||
| 189 | void DownloadMemory(VAddr cpu_addr, u64 size); | ||
| 190 | |||
| 191 | bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); | ||
| 192 | |||
| 193 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); | ||
| 194 | |||
| 195 | void DisableGraphicsUniformBuffer(size_t stage, u32 index); | ||
| 196 | |||
| 197 | void UpdateGraphicsBuffers(bool is_indexed); | ||
| 198 | |||
| 199 | void UpdateComputeBuffers(); | ||
| 200 | |||
| 201 | void BindHostGeometryBuffers(bool is_indexed); | ||
| 202 | |||
| 203 | void BindHostStageBuffers(size_t stage); | ||
| 204 | |||
| 205 | void BindHostComputeBuffers(); | ||
| 206 | |||
| 207 | void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, | ||
| 208 | const UniformBufferSizes* sizes); | ||
| 209 | |||
| 210 | void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); | ||
| 211 | |||
| 212 | void UnbindGraphicsStorageBuffers(size_t stage); | ||
| 213 | |||
| 214 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 215 | bool is_written); | ||
| 216 | |||
| 217 | void UnbindGraphicsTextureBuffers(size_t stage); | ||
| 218 | |||
| 219 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||
| 220 | PixelFormat format, bool is_written, bool is_image); | ||
| 221 | |||
| 222 | void UnbindComputeStorageBuffers(); | ||
| 223 | |||
| 224 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 225 | bool is_written); | ||
| 226 | |||
| 227 | void UnbindComputeTextureBuffers(); | ||
| 228 | |||
| 229 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||
| 230 | bool is_written, bool is_image); | ||
| 231 | |||
| 232 | [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||
| 233 | ObtainBufferSynchronize sync_info, | ||
| 234 | ObtainBufferOperation post_op); | ||
| 235 | void FlushCachedWrites(); | ||
| 236 | |||
| 237 | /// Return true when there are uncommitted buffers to be downloaded | ||
| 238 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 239 | |||
| 240 | void AccumulateFlushes(); | ||
| 241 | |||
| 242 | /// Return true when the caller should wait for async downloads | ||
| 243 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 244 | |||
| 245 | /// Commit asynchronous downloads | ||
| 246 | void CommitAsyncFlushes(); | ||
| 247 | void CommitAsyncFlushesHigh(); | ||
| 248 | |||
| 249 | /// Pop asynchronous downloads | ||
| 250 | void PopAsyncFlushes(); | ||
| 251 | void PopAsyncBuffers(); | ||
| 252 | |||
| 253 | bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||
| 254 | |||
| 255 | bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||
| 256 | |||
| 257 | /// Return true when a CPU region is modified from the GPU | ||
| 258 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 259 | |||
| 260 | /// Return true when a region is registered on the cache | ||
| 261 | [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size); | ||
| 262 | |||
| 263 | /// Return true when a CPU region is modified from the CPU | ||
| 264 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||
| 265 | |||
| 266 | void SetDrawIndirect( | ||
| 267 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) { | ||
| 268 | current_draw_indirect = current_draw_indirect_; | ||
| 269 | } | ||
| 270 | |||
| 271 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount(); | ||
| 272 | |||
| 273 | [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer(); | ||
| 274 | |||
| 275 | std::recursive_mutex mutex; | ||
| 276 | Runtime& runtime; | ||
| 277 | |||
| 278 | private: | ||
| 279 | template <typename Func> | ||
| 280 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 281 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 282 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 283 | index += disabled_bits; | ||
| 284 | enabled_mask >>= disabled_bits; | ||
| 285 | func(index); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | template <typename Func> | ||
| 290 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { | ||
| 291 | const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE); | ||
| 292 | for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) { | ||
| 293 | const BufferId buffer_id = page_table[page]; | ||
| 294 | if (!buffer_id) { | ||
| 295 | ++page; | ||
| 296 | continue; | ||
| 297 | } | ||
| 298 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 299 | func(buffer_id, buffer); | ||
| 300 | |||
| 301 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 302 | page = Common::DivCeil(end_addr, CACHING_PAGESIZE); | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | template <typename Func> | ||
| 307 | void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) { | ||
| 308 | const VAddr start_address = cpu_addr; | ||
| 309 | const VAddr end_address = start_address + size; | ||
| 310 | const IntervalType search_interval{start_address, end_address}; | ||
| 311 | auto it = current_range.lower_bound(search_interval); | ||
| 312 | if (it == current_range.end()) { | ||
| 313 | return; | ||
| 314 | } | ||
| 315 | auto end_it = current_range.upper_bound(search_interval); | ||
| 316 | for (; it != end_it; it++) { | ||
| 317 | VAddr inter_addr_end = it->upper(); | ||
| 318 | VAddr inter_addr = it->lower(); | ||
| 319 | if (inter_addr_end > end_address) { | ||
| 320 | inter_addr_end = end_address; | ||
| 321 | } | ||
| 322 | if (inter_addr < start_address) { | ||
| 323 | inter_addr = start_address; | ||
| 324 | } | ||
| 325 | func(inter_addr, inter_addr_end); | ||
| 326 | } | ||
| 327 | } | ||
| 328 | |||
| 329 | template <typename Func> | ||
| 330 | void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size, | ||
| 331 | Func&& func) { | ||
| 332 | const VAddr start_address = cpu_addr; | ||
| 333 | const VAddr end_address = start_address + size; | ||
| 334 | const IntervalType search_interval{start_address, end_address}; | ||
| 335 | auto it = current_range.lower_bound(search_interval); | ||
| 336 | if (it == current_range.end()) { | ||
| 337 | return; | ||
| 338 | } | ||
| 339 | auto end_it = current_range.upper_bound(search_interval); | ||
| 340 | for (; it != end_it; it++) { | ||
| 341 | auto& inter = it->first; | ||
| 342 | VAddr inter_addr_end = inter.upper(); | ||
| 343 | VAddr inter_addr = inter.lower(); | ||
| 344 | if (inter_addr_end > end_address) { | ||
| 345 | inter_addr_end = end_address; | ||
| 346 | } | ||
| 347 | if (inter_addr < start_address) { | ||
| 348 | inter_addr = start_address; | ||
| 349 | } | ||
| 350 | func(inter_addr, inter_addr_end, it->second); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | void RemoveEachInOverlapCounter(OverlapCounter& current_range, | ||
| 355 | const IntervalType search_interval, int subtract_value) { | ||
| 356 | bool any_removals = false; | ||
| 357 | current_range.add(std::make_pair(search_interval, subtract_value)); | ||
| 358 | do { | ||
| 359 | any_removals = false; | ||
| 360 | auto it = current_range.lower_bound(search_interval); | ||
| 361 | if (it == current_range.end()) { | ||
| 362 | return; | ||
| 363 | } | ||
| 364 | auto end_it = current_range.upper_bound(search_interval); | ||
| 365 | for (; it != end_it; it++) { | ||
| 366 | if (it->second <= 0) { | ||
| 367 | any_removals = true; | ||
| 368 | current_range.erase(it); | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | } | ||
| 372 | } while (any_removals); | ||
| 373 | } | ||
| 374 | |||
| 375 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||
| 376 | return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||
| 377 | ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||
| 378 | } | ||
| 379 | |||
| 380 | void RunGarbageCollector(); | ||
| 381 | |||
| 382 | void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size); | ||
| 383 | |||
| 384 | void BindHostIndexBuffer(); | ||
| 385 | |||
| 386 | void BindHostVertexBuffers(); | ||
| 387 | |||
| 388 | void BindHostDrawIndirectBuffers(); | ||
| 389 | |||
| 390 | void BindHostGraphicsUniformBuffers(size_t stage); | ||
| 391 | |||
| 392 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); | ||
| 393 | |||
| 394 | void BindHostGraphicsStorageBuffers(size_t stage); | ||
| 395 | |||
| 396 | void BindHostGraphicsTextureBuffers(size_t stage); | ||
| 397 | |||
| 398 | void BindHostTransformFeedbackBuffers(); | ||
| 399 | |||
| 400 | void BindHostComputeUniformBuffers(); | ||
| 401 | |||
| 402 | void BindHostComputeStorageBuffers(); | ||
| 403 | |||
| 404 | void BindHostComputeTextureBuffers(); | ||
| 405 | |||
| 406 | void DoUpdateGraphicsBuffers(bool is_indexed); | ||
| 407 | |||
| 408 | void DoUpdateComputeBuffers(); | ||
| 409 | |||
| 410 | void UpdateIndexBuffer(); | ||
| 411 | |||
| 412 | void UpdateVertexBuffers(); | ||
| 413 | |||
| 414 | void UpdateVertexBuffer(u32 index); | ||
| 415 | |||
| 416 | void UpdateDrawIndirect(); | ||
| 417 | |||
| 418 | void UpdateUniformBuffers(size_t stage); | ||
| 419 | |||
| 420 | void UpdateStorageBuffers(size_t stage); | ||
| 421 | |||
| 422 | void UpdateTextureBuffers(size_t stage); | ||
| 423 | |||
| 424 | void UpdateTransformFeedbackBuffers(); | ||
| 425 | |||
| 426 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 427 | |||
| 428 | void UpdateComputeUniformBuffers(); | ||
| 429 | |||
| 430 | void UpdateComputeStorageBuffers(); | ||
| 431 | |||
| 432 | void UpdateComputeTextureBuffers(); | ||
| 433 | |||
| 434 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 435 | |||
| 436 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 437 | |||
| 438 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 439 | |||
| 440 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 441 | |||
| 442 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 443 | |||
| 444 | void Register(BufferId buffer_id); | ||
| 445 | |||
| 446 | void Unregister(BufferId buffer_id); | ||
| 447 | |||
| 448 | template <bool insert> | ||
| 449 | void ChangeRegister(BufferId buffer_id); | ||
| 450 | |||
| 451 | void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; | ||
| 452 | |||
| 453 | bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 454 | |||
| 455 | bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 456 | |||
| 457 | bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 458 | |||
| 459 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 460 | std::span<BufferCopy> copies); | ||
| 461 | |||
| 462 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 463 | std::span<const BufferCopy> copies); | ||
| 464 | |||
| 465 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 466 | |||
| 467 | void DownloadBufferMemory(Buffer& buffer_id); | ||
| 468 | |||
| 469 | void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); | ||
| 470 | |||
| 471 | void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false); | ||
| 472 | |||
| 473 | void NotifyBufferDeletion(); | ||
| 474 | |||
| 475 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | ||
| 476 | bool is_written) const; | ||
| 477 | |||
| 478 | [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, | ||
| 479 | PixelFormat format); | ||
| 480 | |||
| 481 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 482 | |||
| 483 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 484 | |||
| 485 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 486 | |||
| 487 | void ClearDownload(IntervalType subtract_interval); | ||
| 488 | |||
| 489 | VideoCore::RasterizerInterface& rasterizer; | ||
| 490 | Core::Memory::Memory& cpu_memory; | ||
| 491 | |||
| 492 | SlotVector<Buffer> slot_buffers; | ||
| 493 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 494 | |||
| 495 | const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; | ||
| 496 | |||
| 497 | u32 last_index_count = 0; | ||
| 498 | |||
| 499 | Binding index_buffer; | ||
| 500 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 501 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 502 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 503 | std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; | ||
| 504 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 505 | Binding count_buffer_binding; | ||
| 506 | Binding indirect_buffer_binding; | ||
| 507 | |||
| 508 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 509 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 510 | std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; | ||
| 511 | |||
| 512 | std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; | ||
| 513 | u32 enabled_compute_uniform_buffer_mask = 0; | ||
| 514 | |||
| 515 | const UniformBufferSizes* uniform_buffer_sizes{}; | ||
| 516 | const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; | ||
| 517 | |||
| 518 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 519 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 520 | u32 enabled_compute_storage_buffers = 0; | ||
| 521 | u32 written_compute_storage_buffers = 0; | ||
| 522 | |||
| 523 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||
| 524 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||
| 525 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 526 | u32 enabled_compute_texture_buffers = 0; | ||
| 527 | u32 written_compute_texture_buffers = 0; | ||
| 528 | u32 image_compute_texture_buffers = 0; | ||
| 529 | |||
| 530 | std::array<u32, 16> uniform_cache_hits{}; | ||
| 531 | std::array<u32, 16> uniform_cache_shots{}; | ||
| 532 | |||
| 533 | u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE; | ||
| 534 | |||
| 535 | bool has_deleted_buffers = false; | ||
| 536 | |||
| 537 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | ||
| 538 | dirty_uniform_buffers{}; | ||
| 539 | std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; | ||
| 540 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, | ||
| 541 | std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> | ||
| 542 | uniform_buffer_binding_sizes{}; | ||
| 543 | |||
| 544 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 545 | |||
| 546 | MemoryTracker memory_tracker; | ||
| 547 | IntervalSet uncommitted_ranges; | ||
| 548 | IntervalSet common_ranges; | ||
| 549 | IntervalSet cached_ranges; | ||
| 550 | IntervalSet pending_ranges; | ||
| 551 | std::deque<IntervalSet> committed_ranges; | ||
| 552 | |||
| 553 | // Async Buffers | ||
| 554 | OverlapCounter async_downloads; | ||
| 555 | std::deque<std::optional<Async_Buffer>> async_buffers; | ||
| 556 | std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads; | ||
| 557 | std::optional<Async_Buffer> current_buffer; | ||
| 558 | |||
| 559 | std::deque<Async_Buffer> async_buffers_death_ring; | ||
| 560 | |||
| 561 | size_t immediate_buffer_capacity = 0; | ||
| 562 | Common::ScratchBuffer<u8> immediate_buffer_alloc; | ||
| 563 | |||
| 564 | struct LRUItemParams { | ||
| 565 | using ObjectType = BufferId; | ||
| 566 | using TickType = u64; | ||
| 567 | }; | ||
| 568 | Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; | ||
| 569 | u64 frame_tick = 0; | ||
| 570 | u64 total_used_memory = 0; | ||
| 571 | u64 minimum_memory = 0; | ||
| 572 | u64 critical_memory = 0; | ||
| 573 | BufferId inline_buffer_id; | ||
| 574 | |||
| 575 | bool active_async_buffers = false; | ||
| 576 | |||
| 577 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||
| 578 | }; | ||
| 579 | |||
| 580 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h new file mode 100644 index 000000000..dc4ebfcaa --- /dev/null +++ b/src/video_core/buffer_cache/memory_tracker_base.h | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <deque> | ||
| 9 | #include <limits> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | |||
| 14 | #include "common/alignment.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/buffer_cache/word_manager.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | template <class RasterizerInterface> | ||
| 21 | class MemoryTrackerBase { | ||
| 22 | static constexpr size_t MAX_CPU_PAGE_BITS = 39; | ||
| 23 | static constexpr size_t HIGHER_PAGE_BITS = 22; | ||
| 24 | static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS; | ||
| 25 | static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL; | ||
| 26 | static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS); | ||
| 27 | static constexpr size_t MANAGER_POOL_SIZE = 32; | ||
| 28 | static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD; | ||
| 29 | using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>; | ||
| 30 | |||
| 31 | public: | ||
| 32 | MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {} | ||
| 33 | ~MemoryTrackerBase() = default; | ||
| 34 | |||
| 35 | /// Returns the inclusive CPU modified range in a begin end pair | ||
| 36 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | ||
| 37 | u64 query_size) noexcept { | ||
| 38 | return IteratePairs<true>( | ||
| 39 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 40 | return manager->template ModifiedRegion<Type::CPU>(offset, size); | ||
| 41 | }); | ||
| 42 | } | ||
| 43 | |||
| 44 | /// Returns the inclusive GPU modified range in a begin end pair | ||
| 45 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | ||
| 46 | u64 query_size) noexcept { | ||
| 47 | return IteratePairs<false>( | ||
| 48 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 49 | return manager->template ModifiedRegion<Type::GPU>(offset, size); | ||
| 50 | }); | ||
| 51 | } | ||
| 52 | |||
| 53 | /// Returns true if a region has been modified from the CPU | ||
| 54 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 55 | return IteratePages<true>( | ||
| 56 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 57 | return manager->template IsRegionModified<Type::CPU>(offset, size); | ||
| 58 | }); | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Returns true if a region has been modified from the GPU | ||
| 62 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 63 | return IteratePages<false>( | ||
| 64 | query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) { | ||
| 65 | return manager->template IsRegionModified<Type::GPU>(offset, size); | ||
| 66 | }); | ||
| 67 | } | ||
| 68 | |||
| 69 | /// Mark region as CPU modified, notifying the rasterizer about this change | ||
| 70 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 71 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 72 | [](Manager* manager, u64 offset, size_t size) { | ||
| 73 | manager->template ChangeRegionState<Type::CPU, true>( | ||
| 74 | manager->GetCpuAddr() + offset, size); | ||
| 75 | }); | ||
| 76 | } | ||
| 77 | |||
| 78 | /// Unmark region as CPU modified, notifying the rasterizer about this change | ||
| 79 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 80 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 81 | [](Manager* manager, u64 offset, size_t size) { | ||
| 82 | manager->template ChangeRegionState<Type::CPU, false>( | ||
| 83 | manager->GetCpuAddr() + offset, size); | ||
| 84 | }); | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Mark region as modified from the host GPU | ||
| 88 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { | ||
| 89 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 90 | [](Manager* manager, u64 offset, size_t size) { | ||
| 91 | manager->template ChangeRegionState<Type::GPU, true>( | ||
| 92 | manager->GetCpuAddr() + offset, size); | ||
| 93 | }); | ||
| 94 | } | ||
| 95 | |||
| 96 | /// Unmark region as modified from the host GPU | ||
| 97 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { | ||
| 98 | IteratePages<true>(dirty_cpu_addr, query_size, | ||
| 99 | [](Manager* manager, u64 offset, size_t size) { | ||
| 100 | manager->template ChangeRegionState<Type::GPU, false>( | ||
| 101 | manager->GetCpuAddr() + offset, size); | ||
| 102 | }); | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Mark region as modified from the CPU | ||
| 106 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 107 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) { | ||
| 108 | IteratePages<true>( | ||
| 109 | dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) { | ||
| 110 | const VAddr cpu_address = manager->GetCpuAddr() + offset; | ||
| 111 | manager->template ChangeRegionState<Type::CachedCPU, true>(cpu_address, size); | ||
| 112 | cached_pages.insert(static_cast<u32>(cpu_address >> HIGHER_PAGE_BITS)); | ||
| 113 | }); | ||
| 114 | } | ||
| 115 | |||
| 116 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 117 | void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept { | ||
| 118 | IteratePages<false>(query_cpu_addr, query_size, | ||
| 119 | [](Manager* manager, [[maybe_unused]] u64 offset, | ||
| 120 | [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); }); | ||
| 121 | } | ||
| 122 | |||
| 123 | void FlushCachedWrites() noexcept { | ||
| 124 | for (auto id : cached_pages) { | ||
| 125 | top_tier[id]->FlushCachedWrites(); | ||
| 126 | } | ||
| 127 | cached_pages.clear(); | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | ||
| 131 | template <typename Func> | ||
| 132 | void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) { | ||
| 133 | IteratePages<true>(query_cpu_range, query_size, | ||
| 134 | [&func](Manager* manager, u64 offset, size_t size) { | ||
| 135 | manager->template ForEachModifiedRange<Type::CPU, true>( | ||
| 136 | manager->GetCpuAddr() + offset, size, func); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | ||
| 141 | template <typename Func> | ||
| 142 | void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) { | ||
| 143 | IteratePages<false>(query_cpu_range, query_size, | ||
| 144 | [&func, clear](Manager* manager, u64 offset, size_t size) { | ||
| 145 | if (clear) { | ||
| 146 | manager->template ForEachModifiedRange<Type::GPU, true>( | ||
| 147 | manager->GetCpuAddr() + offset, size, func); | ||
| 148 | } else { | ||
| 149 | manager->template ForEachModifiedRange<Type::GPU, false>( | ||
| 150 | manager->GetCpuAddr() + offset, size, func); | ||
| 151 | } | ||
| 152 | }); | ||
| 153 | } | ||
| 154 | |||
| 155 | template <typename Func> | ||
| 156 | void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) { | ||
| 157 | IteratePages<false>(query_cpu_range, query_size, | ||
| 158 | [&func](Manager* manager, u64 offset, size_t size) { | ||
| 159 | manager->template ForEachModifiedRange<Type::GPU, true>( | ||
| 160 | manager->GetCpuAddr() + offset, size, func); | ||
| 161 | }); | ||
| 162 | } | ||
| 163 | |||
| 164 | private: | ||
| 165 | template <bool create_region_on_fail, typename Func> | ||
| 166 | bool IteratePages(VAddr cpu_address, size_t size, Func&& func) { | ||
| 167 | using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type; | ||
| 168 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 169 | std::size_t remaining_size{size}; | ||
| 170 | std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; | ||
| 171 | u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; | ||
| 172 | while (remaining_size > 0) { | ||
| 173 | const std::size_t copy_amount{ | ||
| 174 | std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; | ||
| 175 | auto* manager{top_tier[page_index]}; | ||
| 176 | if (manager) { | ||
| 177 | if constexpr (BOOL_BREAK) { | ||
| 178 | if (func(manager, page_offset, copy_amount)) { | ||
| 179 | return true; | ||
| 180 | } | ||
| 181 | } else { | ||
| 182 | func(manager, page_offset, copy_amount); | ||
| 183 | } | ||
| 184 | } else if constexpr (create_region_on_fail) { | ||
| 185 | CreateRegion(page_index); | ||
| 186 | manager = top_tier[page_index]; | ||
| 187 | if constexpr (BOOL_BREAK) { | ||
| 188 | if (func(manager, page_offset, copy_amount)) { | ||
| 189 | return true; | ||
| 190 | } | ||
| 191 | } else { | ||
| 192 | func(manager, page_offset, copy_amount); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | page_index++; | ||
| 196 | page_offset = 0; | ||
| 197 | remaining_size -= copy_amount; | ||
| 198 | } | ||
| 199 | return false; | ||
| 200 | } | ||
| 201 | |||
| 202 | template <bool create_region_on_fail, typename Func> | ||
| 203 | std::pair<u64, u64> IteratePairs(VAddr cpu_address, size_t size, Func&& func) { | ||
| 204 | std::size_t remaining_size{size}; | ||
| 205 | std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS}; | ||
| 206 | u64 page_offset{cpu_address & HIGHER_PAGE_MASK}; | ||
| 207 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 208 | u64 end = 0; | ||
| 209 | while (remaining_size > 0) { | ||
| 210 | const std::size_t copy_amount{ | ||
| 211 | std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)}; | ||
| 212 | auto* manager{top_tier[page_index]}; | ||
| 213 | const auto execute = [&] { | ||
| 214 | auto [new_begin, new_end] = func(manager, page_offset, copy_amount); | ||
| 215 | if (new_begin != 0 || new_end != 0) { | ||
| 216 | const u64 base_address = page_index << HIGHER_PAGE_BITS; | ||
| 217 | begin = std::min(new_begin + base_address, begin); | ||
| 218 | end = std::max(new_end + base_address, end); | ||
| 219 | } | ||
| 220 | }; | ||
| 221 | if (manager) { | ||
| 222 | execute(); | ||
| 223 | } else if constexpr (create_region_on_fail) { | ||
| 224 | CreateRegion(page_index); | ||
| 225 | manager = top_tier[page_index]; | ||
| 226 | execute(); | ||
| 227 | } | ||
| 228 | page_index++; | ||
| 229 | page_offset = 0; | ||
| 230 | remaining_size -= copy_amount; | ||
| 231 | } | ||
| 232 | if (begin < end) { | ||
| 233 | return std::make_pair(begin, end); | ||
| 234 | } else { | ||
| 235 | return std::make_pair(0ULL, 0ULL); | ||
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | void CreateRegion(std::size_t page_index) { | ||
| 240 | const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS; | ||
| 241 | top_tier[page_index] = GetNewManager(base_cpu_addr); | ||
| 242 | } | ||
| 243 | |||
| 244 | Manager* GetNewManager(VAddr base_cpu_addess) { | ||
| 245 | const auto on_return = [&] { | ||
| 246 | auto* new_manager = free_managers.front(); | ||
| 247 | new_manager->SetCpuAddress(base_cpu_addess); | ||
| 248 | free_managers.pop_front(); | ||
| 249 | return new_manager; | ||
| 250 | }; | ||
| 251 | if (!free_managers.empty()) { | ||
| 252 | return on_return(); | ||
| 253 | } | ||
| 254 | manager_pool.emplace_back(); | ||
| 255 | auto& last_pool = manager_pool.back(); | ||
| 256 | for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) { | ||
| 257 | new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE); | ||
| 258 | free_managers.push_back(&last_pool[i]); | ||
| 259 | } | ||
| 260 | return on_return(); | ||
| 261 | } | ||
| 262 | |||
| 263 | std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool; | ||
| 264 | std::deque<Manager*> free_managers; | ||
| 265 | |||
| 266 | std::array<Manager*, NUM_HIGH_PAGES> top_tier{}; | ||
| 267 | |||
| 268 | std::unordered_set<u32> cached_pages; | ||
| 269 | |||
| 270 | RasterizerInterface* rasterizer = nullptr; | ||
| 271 | }; | ||
| 272 | |||
| 273 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h new file mode 100644 index 000000000..a42455045 --- /dev/null +++ b/src/video_core/buffer_cache/word_manager.h | |||
| @@ -0,0 +1,462 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <limits> | ||
| 9 | #include <span> | ||
| 10 | #include <utility> | ||
| 11 | |||
| 12 | #include "common/alignment.h" | ||
| 13 | #include "common/common_funcs.h" | ||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "common/div_ceil.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | constexpr u64 PAGES_PER_WORD = 64; | ||
| 21 | constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE; | ||
| 22 | constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | ||
| 23 | |||
| 24 | enum class Type { | ||
| 25 | CPU, | ||
| 26 | GPU, | ||
| 27 | CachedCPU, | ||
| 28 | Untracked, | ||
| 29 | }; | ||
| 30 | |||
| 31 | /// Vector tracking modified pages tightly packed with small vector optimization | ||
| 32 | template <size_t stack_words = 1> | ||
| 33 | struct WordsArray { | ||
| 34 | /// Returns the pointer to the words state | ||
| 35 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | ||
| 36 | return is_short ? stack.data() : heap; | ||
| 37 | } | ||
| 38 | |||
| 39 | /// Returns the pointer to the words state | ||
| 40 | [[nodiscard]] u64* Pointer(bool is_short) noexcept { | ||
| 41 | return is_short ? stack.data() : heap; | ||
| 42 | } | ||
| 43 | |||
| 44 | std::array<u64, stack_words> stack{}; ///< Small buffers storage | ||
| 45 | u64* heap; ///< Not-small buffers pointer to the storage | ||
| 46 | }; | ||
| 47 | |||
| 48 | template <size_t stack_words = 1> | ||
| 49 | struct Words { | ||
| 50 | explicit Words() = default; | ||
| 51 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { | ||
| 52 | num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD); | ||
| 53 | if (IsShort()) { | ||
| 54 | cpu.stack.fill(~u64{0}); | ||
| 55 | gpu.stack.fill(0); | ||
| 56 | cached_cpu.stack.fill(0); | ||
| 57 | untracked.stack.fill(~u64{0}); | ||
| 58 | } else { | ||
| 59 | // Share allocation between CPU and GPU pages and set their default values | ||
| 60 | u64* const alloc = new u64[num_words * 4]; | ||
| 61 | cpu.heap = alloc; | ||
| 62 | gpu.heap = alloc + num_words; | ||
| 63 | cached_cpu.heap = alloc + num_words * 2; | ||
| 64 | untracked.heap = alloc + num_words * 3; | ||
| 65 | std::fill_n(cpu.heap, num_words, ~u64{0}); | ||
| 66 | std::fill_n(gpu.heap, num_words, 0); | ||
| 67 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 68 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 69 | } | ||
| 70 | // Clean up tailing bits | ||
| 71 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; | ||
| 72 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); | ||
| 73 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | ||
| 74 | const u64 last_word = (~u64{0} << shift) >> shift; | ||
| 75 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 76 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 77 | } | ||
| 78 | |||
| 79 | ~Words() { | ||
| 80 | Release(); | ||
| 81 | } | ||
| 82 | |||
| 83 | Words& operator=(Words&& rhs) noexcept { | ||
| 84 | Release(); | ||
| 85 | size_bytes = rhs.size_bytes; | ||
| 86 | num_words = rhs.num_words; | ||
| 87 | cpu = rhs.cpu; | ||
| 88 | gpu = rhs.gpu; | ||
| 89 | cached_cpu = rhs.cached_cpu; | ||
| 90 | untracked = rhs.untracked; | ||
| 91 | rhs.cpu.heap = nullptr; | ||
| 92 | return *this; | ||
| 93 | } | ||
| 94 | |||
| 95 | Words(Words&& rhs) noexcept | ||
| 96 | : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu}, | ||
| 97 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 98 | rhs.cpu.heap = nullptr; | ||
| 99 | } | ||
| 100 | |||
| 101 | Words& operator=(const Words&) = delete; | ||
| 102 | Words(const Words&) = delete; | ||
| 103 | |||
| 104 | /// Returns true when the buffer fits in the small vector optimization | ||
| 105 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 106 | return num_words <= stack_words; | ||
| 107 | } | ||
| 108 | |||
| 109 | /// Returns the number of words of the buffer | ||
| 110 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 111 | return num_words; | ||
| 112 | } | ||
| 113 | |||
| 114 | /// Release buffer resources | ||
| 115 | void Release() { | ||
| 116 | if (!IsShort()) { | ||
| 117 | // CPU written words is the base for the heap allocation | ||
| 118 | delete[] cpu.heap; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | template <Type type> | ||
| 123 | std::span<u64> Span() noexcept { | ||
| 124 | if constexpr (type == Type::CPU) { | ||
| 125 | return std::span<u64>(cpu.Pointer(IsShort()), num_words); | ||
| 126 | } else if constexpr (type == Type::GPU) { | ||
| 127 | return std::span<u64>(gpu.Pointer(IsShort()), num_words); | ||
| 128 | } else if constexpr (type == Type::CachedCPU) { | ||
| 129 | return std::span<u64>(cached_cpu.Pointer(IsShort()), num_words); | ||
| 130 | } else if constexpr (type == Type::Untracked) { | ||
| 131 | return std::span<u64>(untracked.Pointer(IsShort()), num_words); | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | template <Type type> | ||
| 136 | std::span<const u64> Span() const noexcept { | ||
| 137 | if constexpr (type == Type::CPU) { | ||
| 138 | return std::span<const u64>(cpu.Pointer(IsShort()), num_words); | ||
| 139 | } else if constexpr (type == Type::GPU) { | ||
| 140 | return std::span<const u64>(gpu.Pointer(IsShort()), num_words); | ||
| 141 | } else if constexpr (type == Type::CachedCPU) { | ||
| 142 | return std::span<const u64>(cached_cpu.Pointer(IsShort()), num_words); | ||
| 143 | } else if constexpr (type == Type::Untracked) { | ||
| 144 | return std::span<const u64>(untracked.Pointer(IsShort()), num_words); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | u64 size_bytes = 0; | ||
| 149 | size_t num_words = 0; | ||
| 150 | WordsArray<stack_words> cpu; | ||
| 151 | WordsArray<stack_words> gpu; | ||
| 152 | WordsArray<stack_words> cached_cpu; | ||
| 153 | WordsArray<stack_words> untracked; | ||
| 154 | }; | ||
| 155 | |||
| 156 | template <class RasterizerInterface, size_t stack_words = 1> | ||
| 157 | class WordManager { | ||
| 158 | public: | ||
| 159 | explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes) | ||
| 160 | : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {} | ||
| 161 | |||
| 162 | explicit WordManager() = default; | ||
| 163 | |||
| 164 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 165 | cpu_addr = new_cpu_addr; | ||
| 166 | } | ||
| 167 | |||
| 168 | VAddr GetCpuAddr() const { | ||
| 169 | return cpu_addr; | ||
| 170 | } | ||
| 171 | |||
| 172 | static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) { | ||
| 173 | constexpr size_t number_bits = sizeof(u64) * 8; | ||
| 174 | const size_t limit_page_end = number_bits - std::min(page_end, number_bits); | ||
| 175 | u64 bits = (word >> page_start) << page_start; | ||
| 176 | bits = (bits << limit_page_end) >> limit_page_end; | ||
| 177 | return bits; | ||
| 178 | } | ||
| 179 | |||
| 180 | static std::pair<size_t, size_t> GetWordPage(VAddr address) { | ||
| 181 | const size_t converted_address = static_cast<size_t>(address); | ||
| 182 | const size_t word_number = converted_address / BYTES_PER_WORD; | ||
| 183 | const size_t amount_pages = converted_address % BYTES_PER_WORD; | ||
| 184 | return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE); | ||
| 185 | } | ||
| 186 | |||
| 187 | template <typename Func> | ||
| 188 | void IterateWords(size_t offset, size_t size, Func&& func) const { | ||
| 189 | using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>; | ||
| 190 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 191 | const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL)); | ||
| 192 | const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL)); | ||
| 193 | if (start >= SizeBytes() || end <= start) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | auto [start_word, start_page] = GetWordPage(start); | ||
| 197 | auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL); | ||
| 198 | const size_t num_words = NumWords(); | ||
| 199 | start_word = std::min(start_word, num_words); | ||
| 200 | end_word = std::min(end_word, num_words); | ||
| 201 | const size_t diff = end_word - start_word; | ||
| 202 | end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD; | ||
| 203 | end_word = std::min(end_word, num_words); | ||
| 204 | end_page += diff * PAGES_PER_WORD; | ||
| 205 | constexpr u64 base_mask{~0ULL}; | ||
| 206 | for (size_t word_index = start_word; word_index < end_word; word_index++) { | ||
| 207 | const u64 mask = ExtractBits(base_mask, start_page, end_page); | ||
| 208 | start_page = 0; | ||
| 209 | end_page -= PAGES_PER_WORD; | ||
| 210 | if constexpr (BOOL_BREAK) { | ||
| 211 | if (func(word_index, mask)) { | ||
| 212 | return; | ||
| 213 | } | ||
| 214 | } else { | ||
| 215 | func(word_index, mask); | ||
| 216 | } | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | template <typename Func> | ||
| 221 | void IteratePages(u64 mask, Func&& func) const { | ||
| 222 | size_t offset = 0; | ||
| 223 | while (mask != 0) { | ||
| 224 | const size_t empty_bits = std::countr_zero(mask); | ||
| 225 | offset += empty_bits; | ||
| 226 | mask = mask >> empty_bits; | ||
| 227 | |||
| 228 | const size_t continuous_bits = std::countr_one(mask); | ||
| 229 | func(offset, continuous_bits); | ||
| 230 | mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0; | ||
| 231 | offset += continuous_bits; | ||
| 232 | } | ||
| 233 | } | ||
| 234 | |||
| 235 | /** | ||
| 236 | * Change the state of a range of pages | ||
| 237 | * | ||
| 238 | * @param dirty_addr Base address to mark or unmark as modified | ||
| 239 | * @param size Size in bytes to mark or unmark as modified | ||
| 240 | */ | ||
| 241 | template <Type type, bool enable> | ||
| 242 | void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) { | ||
| 243 | std::span<u64> state_words = words.template Span<type>(); | ||
| 244 | [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); | ||
| 245 | [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>(); | ||
| 246 | IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) { | ||
| 247 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 248 | NotifyRasterizer<!enable>(index, untracked_words[index], mask); | ||
| 249 | } | ||
| 250 | if constexpr (enable) { | ||
| 251 | state_words[index] |= mask; | ||
| 252 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 253 | untracked_words[index] |= mask; | ||
| 254 | } | ||
| 255 | if constexpr (type == Type::CPU) { | ||
| 256 | cached_words[index] &= ~mask; | ||
| 257 | } | ||
| 258 | } else { | ||
| 259 | if constexpr (type == Type::CPU) { | ||
| 260 | const u64 word = state_words[index] & mask; | ||
| 261 | cached_words[index] &= ~word; | ||
| 262 | } | ||
| 263 | state_words[index] &= ~mask; | ||
| 264 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 265 | untracked_words[index] &= ~mask; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | }); | ||
| 269 | } | ||
| 270 | |||
| 271 | /** | ||
| 272 | * Loop over each page in the given range, turn off those bits and notify the rasterizer if | ||
| 273 | * needed. Call the given function on each turned off range. | ||
| 274 | * | ||
| 275 | * @param query_cpu_range Base CPU address to loop over | ||
| 276 | * @param size Size in bytes of the CPU range to loop over | ||
| 277 | * @param func Function to call for each turned off region | ||
| 278 | */ | ||
| 279 | template <Type type, bool clear, typename Func> | ||
| 280 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | ||
| 281 | static_assert(type != Type::Untracked); | ||
| 282 | |||
| 283 | std::span<u64> state_words = words.template Span<type>(); | ||
| 284 | [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>(); | ||
| 285 | [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>(); | ||
| 286 | const size_t offset = query_cpu_range - cpu_addr; | ||
| 287 | bool pending = false; | ||
| 288 | size_t pending_offset{}; | ||
| 289 | size_t pending_pointer{}; | ||
| 290 | const auto release = [&]() { | ||
| 291 | func(cpu_addr + pending_offset * BYTES_PER_PAGE, | ||
| 292 | (pending_pointer - pending_offset) * BYTES_PER_PAGE); | ||
| 293 | }; | ||
| 294 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 295 | const u64 word = state_words[index] & mask; | ||
| 296 | if constexpr (clear) { | ||
| 297 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 298 | NotifyRasterizer<true>(index, untracked_words[index], mask); | ||
| 299 | } | ||
| 300 | state_words[index] &= ~mask; | ||
| 301 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 302 | untracked_words[index] &= ~mask; | ||
| 303 | } | ||
| 304 | if constexpr (type == Type::CPU) { | ||
| 305 | cached_words[index] &= ~word; | ||
| 306 | } | ||
| 307 | } | ||
| 308 | const size_t base_offset = index * PAGES_PER_WORD; | ||
| 309 | IteratePages(word, [&](size_t pages_offset, size_t pages_size) { | ||
| 310 | const auto reset = [&]() { | ||
| 311 | pending_offset = base_offset + pages_offset; | ||
| 312 | pending_pointer = base_offset + pages_offset + pages_size; | ||
| 313 | }; | ||
| 314 | if (!pending) { | ||
| 315 | reset(); | ||
| 316 | pending = true; | ||
| 317 | return; | ||
| 318 | } | ||
| 319 | if (pending_pointer == base_offset + pages_offset) { | ||
| 320 | pending_pointer += pages_size; | ||
| 321 | return; | ||
| 322 | } | ||
| 323 | release(); | ||
| 324 | reset(); | ||
| 325 | }); | ||
| 326 | }); | ||
| 327 | if (pending) { | ||
| 328 | release(); | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | /** | ||
| 333 | * Returns true when a region has been modified | ||
| 334 | * | ||
| 335 | * @param offset Offset in bytes from the start of the buffer | ||
| 336 | * @param size Size in bytes of the region to query for modifications | ||
| 337 | */ | ||
| 338 | template <Type type> | ||
| 339 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||
| 340 | static_assert(type != Type::Untracked); | ||
| 341 | |||
| 342 | const std::span<const u64> state_words = words.template Span<type>(); | ||
| 343 | bool result = false; | ||
| 344 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 345 | const u64 word = state_words[index] & mask; | ||
| 346 | if (word != 0) { | ||
| 347 | result = true; | ||
| 348 | return true; | ||
| 349 | } | ||
| 350 | return false; | ||
| 351 | }); | ||
| 352 | return result; | ||
| 353 | } | ||
| 354 | |||
| 355 | /** | ||
| 356 | * Returns a begin end pair with the inclusive modified region | ||
| 357 | * | ||
| 358 | * @param offset Offset in bytes from the start of the buffer | ||
| 359 | * @param size Size in bytes of the region to query for modifications | ||
| 360 | */ | ||
| 361 | template <Type type> | ||
| 362 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||
| 363 | static_assert(type != Type::Untracked); | ||
| 364 | const std::span<const u64> state_words = words.template Span<type>(); | ||
| 365 | u64 begin = std::numeric_limits<u64>::max(); | ||
| 366 | u64 end = 0; | ||
| 367 | IterateWords(offset, size, [&](size_t index, u64 mask) { | ||
| 368 | const u64 word = state_words[index] & mask; | ||
| 369 | if (word == 0) { | ||
| 370 | return; | ||
| 371 | } | ||
| 372 | const u64 local_page_begin = std::countr_zero(word); | ||
| 373 | const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); | ||
| 374 | const u64 page_index = index * PAGES_PER_WORD; | ||
| 375 | begin = std::min(begin, page_index + local_page_begin); | ||
| 376 | end = page_index + local_page_end; | ||
| 377 | }); | ||
| 378 | static constexpr std::pair<u64, u64> EMPTY{0, 0}; | ||
| 379 | return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; | ||
| 380 | } | ||
| 381 | |||
| 382 | /// Returns the number of words of the manager | ||
| 383 | [[nodiscard]] size_t NumWords() const noexcept { | ||
| 384 | return words.NumWords(); | ||
| 385 | } | ||
| 386 | |||
| 387 | /// Returns the size in bytes of the manager | ||
| 388 | [[nodiscard]] u64 SizeBytes() const noexcept { | ||
| 389 | return words.size_bytes; | ||
| 390 | } | ||
| 391 | |||
| 392 | /// Returns true when the buffer fits in the small vector optimization | ||
| 393 | [[nodiscard]] bool IsShort() const noexcept { | ||
| 394 | return words.IsShort(); | ||
| 395 | } | ||
| 396 | |||
| 397 | void FlushCachedWrites() noexcept { | ||
| 398 | const u64 num_words = NumWords(); | ||
| 399 | u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 400 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 401 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 402 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 403 | const u64 cached_bits = cached_words[word_index]; | ||
| 404 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 405 | untracked_words[word_index] |= cached_bits; | ||
| 406 | cpu_words[word_index] |= cached_bits; | ||
| 407 | cached_words[word_index] = 0; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | private: | ||
| 412 | template <Type type> | ||
| 413 | u64* Array() noexcept { | ||
| 414 | if constexpr (type == Type::CPU) { | ||
| 415 | return words.cpu.Pointer(IsShort()); | ||
| 416 | } else if constexpr (type == Type::GPU) { | ||
| 417 | return words.gpu.Pointer(IsShort()); | ||
| 418 | } else if constexpr (type == Type::CachedCPU) { | ||
| 419 | return words.cached_cpu.Pointer(IsShort()); | ||
| 420 | } else if constexpr (type == Type::Untracked) { | ||
| 421 | return words.untracked.Pointer(IsShort()); | ||
| 422 | } | ||
| 423 | } | ||
| 424 | |||
| 425 | template <Type type> | ||
| 426 | const u64* Array() const noexcept { | ||
| 427 | if constexpr (type == Type::CPU) { | ||
| 428 | return words.cpu.Pointer(IsShort()); | ||
| 429 | } else if constexpr (type == Type::GPU) { | ||
| 430 | return words.gpu.Pointer(IsShort()); | ||
| 431 | } else if constexpr (type == Type::CachedCPU) { | ||
| 432 | return words.cached_cpu.Pointer(IsShort()); | ||
| 433 | } else if constexpr (type == Type::Untracked) { | ||
| 434 | return words.untracked.Pointer(IsShort()); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | |||
| 438 | /** | ||
| 439 | * Notify rasterizer about changes in the CPU tracking state of a word in the buffer | ||
| 440 | * | ||
| 441 | * @param word_index Index to the word to notify to the rasterizer | ||
| 442 | * @param current_bits Current state of the word | ||
| 443 | * @param new_bits New state of the word | ||
| 444 | * | ||
| 445 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | ||
| 446 | */ | ||
| 447 | template <bool add_to_rasterizer> | ||
| 448 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { | ||
| 449 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | ||
| 450 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | ||
| 451 | IteratePages(changed_bits, [&](size_t offset, size_t size) { | ||
| 452 | rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE, | ||
| 453 | size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1); | ||
| 454 | }); | ||
| 455 | } | ||
| 456 | |||
| 457 | VAddr cpu_addr = 0; | ||
| 458 | RasterizerInterface* rasterizer = nullptr; | ||
| 459 | Words<stack_words> words; | ||
| 460 | }; | ||
| 461 | |||
| 462 | } // namespace VideoCommon | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a8c3f8b67..18d3c3ac0 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 11 | #include "video_core/rasterizer_interface.h" | 12 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 13 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -200,6 +201,8 @@ private: | |||
| 200 | struct BufferCacheParams { | 201 | struct BufferCacheParams { |
| 201 | using Runtime = OpenGL::BufferCacheRuntime; | 202 | using Runtime = OpenGL::BufferCacheRuntime; |
| 202 | using Buffer = OpenGL::Buffer; | 203 | using Buffer = OpenGL::Buffer; |
| 204 | using Async_Buffer = u32; | ||
| 205 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||
| 203 | 206 | ||
| 204 | static constexpr bool IS_OPENGL = true; | 207 | static constexpr bool IS_OPENGL = true; |
| 205 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | 208 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; |
| @@ -208,6 +211,7 @@ struct BufferCacheParams { | |||
| 208 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 211 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 209 | static constexpr bool USE_MEMORY_MAPS = false; | 212 | static constexpr bool USE_MEMORY_MAPS = false; |
| 210 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | 213 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; |
| 214 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; | ||
| 211 | }; | 215 | }; |
| 212 | 216 | ||
| 213 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 217 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp new file mode 100644 index 000000000..f15ae8e25 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | ||
| 3 | |||
| 4 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 5 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>; | ||
| 9 | } | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2a8d9e377..908625c66 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -93,8 +93,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 93 | state_tracker(), scheduler(device, state_tracker), | 93 | state_tracker(), scheduler(device, state_tracker), |
| 94 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | 94 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, |
| 95 | render_window.GetFramebufferLayout().height, false), | 95 | render_window.GetFramebufferLayout().height, false), |
| 96 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, | 96 | present_manager(render_window, device, memory_allocator, scheduler, swapchain), |
| 97 | screen_info), | 97 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager, |
| 98 | scheduler, screen_info), | ||
| 98 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, | 99 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, |
| 99 | state_tracker, scheduler) { | 100 | state_tracker, scheduler) { |
| 100 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { | 101 | if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { |
| @@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 121 | return; | 122 | return; |
| 122 | } | 123 | } |
| 123 | // Update screen info if the framebuffer size has changed. | 124 | // Update screen info if the framebuffer size has changed. |
| 124 | if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) { | 125 | screen_info.width = framebuffer->width; |
| 125 | screen_info.width = framebuffer->width; | 126 | screen_info.height = framebuffer->height; |
| 126 | screen_info.height = framebuffer->height; | 127 | |
| 127 | } | ||
| 128 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 128 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 129 | const bool use_accelerated = | 129 | const bool use_accelerated = |
| 130 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 130 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 131 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 131 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 132 | RenderScreenshot(*framebuffer, use_accelerated); | 132 | RenderScreenshot(*framebuffer, use_accelerated); |
| 133 | 133 | ||
| 134 | bool has_been_recreated = false; | 134 | Frame* frame = present_manager.GetRenderFrame(); |
| 135 | const auto recreate_swapchain = [&](u32 width, u32 height) { | 135 | blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); |
| 136 | if (!has_been_recreated) { | 136 | scheduler.Flush(*frame->render_ready); |
| 137 | has_been_recreated = true; | 137 | present_manager.Present(frame); |
| 138 | scheduler.Finish(); | ||
| 139 | } | ||
| 140 | swapchain.Create(width, height, is_srgb); | ||
| 141 | }; | ||
| 142 | |||
| 143 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); | ||
| 144 | if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width || | ||
| 145 | swapchain.GetHeight() != layout.height) { | ||
| 146 | recreate_swapchain(layout.width, layout.height); | ||
| 147 | } | ||
| 148 | bool is_outdated; | ||
| 149 | do { | ||
| 150 | swapchain.AcquireNextImage(); | ||
| 151 | is_outdated = swapchain.IsOutDated(); | ||
| 152 | if (is_outdated) { | ||
| 153 | recreate_swapchain(layout.width, layout.height); | ||
| 154 | } | ||
| 155 | } while (is_outdated); | ||
| 156 | if (has_been_recreated) { | ||
| 157 | blit_screen.Recreate(); | ||
| 158 | } | ||
| 159 | const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); | ||
| 160 | const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); | ||
| 161 | scheduler.Flush(render_semaphore, present_semaphore); | ||
| 162 | scheduler.WaitWorker(); | ||
| 163 | swapchain.Present(render_semaphore); | ||
| 164 | 138 | ||
| 165 | gpu.RendererFrameEndNotify(); | 139 | gpu.RendererFrameEndNotify(); |
| 166 | rasterizer.TickFrame(); | 140 | rasterizer.TickFrame(); |
| @@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 246 | }); | 220 | }); |
| 247 | const VkExtent2D render_area{.width = layout.width, .height = layout.height}; | 221 | const VkExtent2D render_area{.width = layout.width, .height = layout.height}; |
| 248 | const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); | 222 | const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); |
| 249 | // Since we're not rendering to the screen, ignore the render semaphore. | 223 | blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated); |
| 250 | void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated)); | ||
| 251 | 224 | ||
| 252 | const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); | 225 | const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); |
| 253 | const VkBufferCreateInfo dst_buffer_info{ | 226 | const VkBufferCreateInfo dst_buffer_info{ |
| @@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 270 | .pNext = nullptr, | 243 | .pNext = nullptr, |
| 271 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | 244 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, |
| 272 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | 245 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 273 | .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, | 246 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 274 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | 247 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, |
| 275 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 248 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 276 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 249 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 009e75e0d..f44367cb2 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/dynamic_library.h" | 9 | #include "common/dynamic_library.h" |
| 10 | #include "video_core/renderer_base.h" | 10 | #include "video_core/renderer_base.h" |
| 11 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 11 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 12 | #include "video_core/renderer_vulkan/vk_present_manager.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | 13 | #include "video_core/renderer_vulkan/vk_rasterizer.h" |
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 14 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 15 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| @@ -76,6 +77,7 @@ private: | |||
| 76 | StateTracker state_tracker; | 77 | StateTracker state_tracker; |
| 77 | Scheduler scheduler; | 78 | Scheduler scheduler; |
| 78 | Swapchain swapchain; | 79 | Swapchain swapchain; |
| 80 | PresentManager present_manager; | ||
| 79 | BlitScreen blit_screen; | 81 | BlitScreen blit_screen; |
| 80 | RasterizerVulkan rasterizer; | 82 | RasterizerVulkan rasterizer; |
| 81 | std::optional<TurboMode> turbo_mode; | 83 | std::optional<TurboMode> turbo_mode; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 2f0cc27e8..1e0fdd3d9 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -122,10 +122,12 @@ struct BlitScreen::BufferData { | |||
| 122 | 122 | ||
| 123 | BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, | 123 | BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, |
| 124 | const Device& device_, MemoryAllocator& memory_allocator_, | 124 | const Device& device_, MemoryAllocator& memory_allocator_, |
| 125 | Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_) | 125 | Swapchain& swapchain_, PresentManager& present_manager_, |
| 126 | Scheduler& scheduler_, const ScreenInfo& screen_info_) | ||
| 126 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, | 127 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, |
| 127 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, | 128 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, |
| 128 | image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | 129 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_}, |
| 130 | current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} { | ||
| 129 | resource_ticks.resize(image_count); | 131 | resource_ticks.resize(image_count); |
| 130 | 132 | ||
| 131 | CreateStaticResources(); | 133 | CreateStaticResources(); |
| @@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin | |||
| 135 | BlitScreen::~BlitScreen() = default; | 137 | BlitScreen::~BlitScreen() = default; |
| 136 | 138 | ||
| 137 | void BlitScreen::Recreate() { | 139 | void BlitScreen::Recreate() { |
| 140 | present_manager.WaitPresent(); | ||
| 141 | scheduler.Finish(); | ||
| 142 | device.GetLogical().WaitIdle(); | ||
| 138 | CreateDynamicResources(); | 143 | CreateDynamicResources(); |
| 139 | } | 144 | } |
| 140 | 145 | ||
| 141 | VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | 146 | void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, |
| 142 | const VkFramebuffer& host_framebuffer, | 147 | const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, |
| 143 | const Layout::FramebufferLayout layout, VkExtent2D render_area, | 148 | VkExtent2D render_area, bool use_accelerated) { |
| 144 | bool use_accelerated) { | ||
| 145 | RefreshResources(framebuffer); | 149 | RefreshResources(framebuffer); |
| 146 | 150 | ||
| 147 | // Finish any pending renderpass | 151 | // Finish any pending renderpass |
| 148 | scheduler.RequestOutsideRenderPassOperationContext(); | 152 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 149 | 153 | ||
| 150 | if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) { | ||
| 151 | image_count = swapchain_images; | ||
| 152 | Recreate(); | ||
| 153 | } | ||
| 154 | |||
| 155 | const std::size_t image_index = swapchain.GetImageIndex(); | ||
| 156 | |||
| 157 | scheduler.Wait(resource_ticks[image_index]); | 154 | scheduler.Wait(resource_ticks[image_index]); |
| 158 | resource_ticks[image_index] = scheduler.CurrentTick(); | 155 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 159 | 156 | ||
| @@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 169 | std::memcpy(mapped_span.data(), &data, sizeof(data)); | 166 | std::memcpy(mapped_span.data(), &data, sizeof(data)); |
| 170 | 167 | ||
| 171 | if (!use_accelerated) { | 168 | if (!use_accelerated) { |
| 172 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 169 | const u64 image_offset = GetRawImageOffset(framebuffer); |
| 173 | 170 | ||
| 174 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 171 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 175 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 172 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |
| @@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 204 | .depth = 1, | 201 | .depth = 1, |
| 205 | }, | 202 | }, |
| 206 | }; | 203 | }; |
| 207 | scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { | 204 | scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { |
| 208 | const VkImage image = *raw_images[image_index]; | 205 | const VkImage image = *raw_images[index]; |
| 209 | const VkImageMemoryBarrier base_barrier{ | 206 | const VkImageMemoryBarrier base_barrier{ |
| 210 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 207 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 211 | .pNext = nullptr, | 208 | .pNext = nullptr, |
| @@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 245 | 242 | ||
| 246 | const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); | 243 | const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); |
| 247 | if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { | 244 | if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { |
| 248 | UpdateAADescriptorSet(image_index, source_image_view, false); | 245 | UpdateAADescriptorSet(source_image_view, false); |
| 249 | const u32 up_scale = Settings::values.resolution_info.up_scale; | 246 | const u32 up_scale = Settings::values.resolution_info.up_scale; |
| 250 | const u32 down_shift = Settings::values.resolution_info.down_shift; | 247 | const u32 down_shift = Settings::values.resolution_info.down_shift; |
| 251 | VkExtent2D size{ | 248 | VkExtent2D size{ |
| 252 | .width = (up_scale * framebuffer.width) >> down_shift, | 249 | .width = (up_scale * framebuffer.width) >> down_shift, |
| 253 | .height = (up_scale * framebuffer.height) >> down_shift, | 250 | .height = (up_scale * framebuffer.height) >> down_shift, |
| 254 | }; | 251 | }; |
| 255 | scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { | 252 | scheduler.Record([this, index = image_index, size, |
| 253 | anti_alias_pass](vk::CommandBuffer cmdbuf) { | ||
| 256 | const VkImageMemoryBarrier base_barrier{ | 254 | const VkImageMemoryBarrier base_barrier{ |
| 257 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 255 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 258 | .pNext = nullptr, | 256 | .pNext = nullptr, |
| @@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 326 | 324 | ||
| 327 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); | 325 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); |
| 328 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, | 326 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, |
| 329 | aa_descriptor_sets[image_index], {}); | 327 | aa_descriptor_sets[index], {}); |
| 330 | cmdbuf.Draw(4, 1, 0, 0); | 328 | cmdbuf.Draw(4, 1, 0, 0); |
| 331 | cmdbuf.EndRenderPass(); | 329 | cmdbuf.EndRenderPass(); |
| 332 | 330 | ||
| @@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 369 | }; | 367 | }; |
| 370 | VkImageView fsr_image_view = | 368 | VkImageView fsr_image_view = |
| 371 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); | 369 | fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); |
| 372 | UpdateDescriptorSet(image_index, fsr_image_view, true); | 370 | UpdateDescriptorSet(fsr_image_view, true); |
| 373 | } else { | 371 | } else { |
| 374 | const bool is_nn = | 372 | const bool is_nn = |
| 375 | Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; | 373 | Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; |
| 376 | UpdateDescriptorSet(image_index, source_image_view, is_nn); | 374 | UpdateDescriptorSet(source_image_view, is_nn); |
| 377 | } | 375 | } |
| 378 | 376 | ||
| 379 | scheduler.Record( | 377 | scheduler.Record([this, host_framebuffer, index = image_index, |
| 380 | [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { | 378 | size = render_area](vk::CommandBuffer cmdbuf) { |
| 381 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; | 379 | const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; |
| 382 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; | 380 | const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; |
| 383 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; | 381 | const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; |
| 384 | const VkClearValue clear_color{ | 382 | const VkClearValue clear_color{ |
| 385 | .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, | 383 | .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, |
| 386 | }; | 384 | }; |
| 387 | const VkRenderPassBeginInfo renderpass_bi{ | 385 | const VkRenderPassBeginInfo renderpass_bi{ |
| 388 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, | 386 | .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, |
| 389 | .pNext = nullptr, | 387 | .pNext = nullptr, |
| 390 | .renderPass = *renderpass, | 388 | .renderPass = *renderpass, |
| 391 | .framebuffer = host_framebuffer, | 389 | .framebuffer = host_framebuffer, |
| 392 | .renderArea = | 390 | .renderArea = |
| 393 | { | 391 | { |
| 394 | .offset = {0, 0}, | 392 | .offset = {0, 0}, |
| 395 | .extent = size, | 393 | .extent = size, |
| 396 | }, | 394 | }, |
| 397 | .clearValueCount = 1, | 395 | .clearValueCount = 1, |
| 398 | .pClearValues = &clear_color, | 396 | .pClearValues = &clear_color, |
| 399 | }; | 397 | }; |
| 400 | const VkViewport viewport{ | 398 | const VkViewport viewport{ |
| 401 | .x = 0.0f, | 399 | .x = 0.0f, |
| 402 | .y = 0.0f, | 400 | .y = 0.0f, |
| 403 | .width = static_cast<float>(size.width), | 401 | .width = static_cast<float>(size.width), |
| 404 | .height = static_cast<float>(size.height), | 402 | .height = static_cast<float>(size.height), |
| 405 | .minDepth = 0.0f, | 403 | .minDepth = 0.0f, |
| 406 | .maxDepth = 1.0f, | 404 | .maxDepth = 1.0f, |
| 407 | }; | 405 | }; |
| 408 | const VkRect2D scissor{ | 406 | const VkRect2D scissor{ |
| 409 | .offset = {0, 0}, | 407 | .offset = {0, 0}, |
| 410 | .extent = size, | 408 | .extent = size, |
| 411 | }; | 409 | }; |
| 412 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | 410 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 413 | auto graphics_pipeline = [this]() { | 411 | auto graphics_pipeline = [this]() { |
| 414 | switch (Settings::values.scaling_filter.GetValue()) { | 412 | switch (Settings::values.scaling_filter.GetValue()) { |
| 415 | case Settings::ScalingFilter::NearestNeighbor: | 413 | case Settings::ScalingFilter::NearestNeighbor: |
| 416 | case Settings::ScalingFilter::Bilinear: | 414 | case Settings::ScalingFilter::Bilinear: |
| 417 | return *bilinear_pipeline; | 415 | return *bilinear_pipeline; |
| 418 | case Settings::ScalingFilter::Bicubic: | 416 | case Settings::ScalingFilter::Bicubic: |
| 419 | return *bicubic_pipeline; | 417 | return *bicubic_pipeline; |
| 420 | case Settings::ScalingFilter::Gaussian: | 418 | case Settings::ScalingFilter::Gaussian: |
| 421 | return *gaussian_pipeline; | 419 | return *gaussian_pipeline; |
| 422 | case Settings::ScalingFilter::ScaleForce: | 420 | case Settings::ScalingFilter::ScaleForce: |
| 423 | return *scaleforce_pipeline; | 421 | return *scaleforce_pipeline; |
| 424 | default: | 422 | default: |
| 425 | return *bilinear_pipeline; | 423 | return *bilinear_pipeline; |
| 426 | } | 424 | } |
| 427 | }(); | 425 | }(); |
| 428 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); | 426 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); |
| 429 | cmdbuf.SetViewport(0, viewport); | 427 | cmdbuf.SetViewport(0, viewport); |
| 430 | cmdbuf.SetScissor(0, scissor); | 428 | cmdbuf.SetScissor(0, scissor); |
| 431 | 429 | ||
| 432 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); | 430 | cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); |
| 433 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, | 431 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, |
| 434 | descriptor_sets[image_index], {}); | 432 | descriptor_sets[index], {}); |
| 435 | cmdbuf.Draw(4, 1, 0, 0); | 433 | cmdbuf.Draw(4, 1, 0, 0); |
| 436 | cmdbuf.EndRenderPass(); | 434 | cmdbuf.EndRenderPass(); |
| 437 | }); | 435 | }); |
| 438 | return *semaphores[image_index]; | ||
| 439 | } | 436 | } |
| 440 | 437 | ||
| 441 | VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, | 438 | void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, |
| 442 | bool use_accelerated) { | 439 | bool use_accelerated, bool is_srgb) { |
| 443 | const std::size_t image_index = swapchain.GetImageIndex(); | 440 | // Recreate dynamic resources if the the image count or colorspace changed |
| 444 | const VkExtent2D render_area = swapchain.GetSize(); | 441 | if (const std::size_t swapchain_images = swapchain.GetImageCount(); |
| 442 | swapchain_images != image_count || current_srgb != is_srgb) { | ||
| 443 | current_srgb = is_srgb; | ||
| 444 | image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; | ||
| 445 | image_count = swapchain_images; | ||
| 446 | Recreate(); | ||
| 447 | } | ||
| 448 | |||
| 449 | // Recreate the presentation frame if the dimensions of the window changed | ||
| 445 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); | 450 | const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); |
| 446 | return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated); | 451 | if (layout.width != frame->width || layout.height != frame->height || |
| 452 | is_srgb != frame->is_srgb) { | ||
| 453 | Recreate(); | ||
| 454 | present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb, | ||
| 455 | image_view_format, *renderpass); | ||
| 456 | } | ||
| 457 | |||
| 458 | const VkExtent2D render_area{frame->width, frame->height}; | ||
| 459 | Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated); | ||
| 460 | if (++image_index >= image_count) { | ||
| 461 | image_index = 0; | ||
| 462 | } | ||
| 447 | } | 463 | } |
| 448 | 464 | ||
| 449 | vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { | 465 | vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { |
| @@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() { | |||
| 471 | } | 487 | } |
| 472 | 488 | ||
| 473 | void BlitScreen::CreateDynamicResources() { | 489 | void BlitScreen::CreateDynamicResources() { |
| 474 | CreateSemaphores(); | ||
| 475 | CreateDescriptorPool(); | 490 | CreateDescriptorPool(); |
| 476 | CreateDescriptorSetLayout(); | 491 | CreateDescriptorSetLayout(); |
| 477 | CreateDescriptorSets(); | 492 | CreateDescriptorSets(); |
| 478 | CreatePipelineLayout(); | 493 | CreatePipelineLayout(); |
| 479 | CreateRenderPass(); | 494 | CreateRenderPass(); |
| 480 | CreateFramebuffers(); | ||
| 481 | CreateGraphicsPipeline(); | 495 | CreateGraphicsPipeline(); |
| 482 | fsr.reset(); | 496 | fsr.reset(); |
| 483 | smaa.reset(); | 497 | smaa.reset(); |
| @@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() { | |||
| 525 | } | 539 | } |
| 526 | } | 540 | } |
| 527 | 541 | ||
| 528 | void BlitScreen::CreateSemaphores() { | ||
| 529 | semaphores.resize(image_count); | ||
| 530 | std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); }); | ||
| 531 | } | ||
| 532 | |||
| 533 | void BlitScreen::CreateDescriptorPool() { | 542 | void BlitScreen::CreateDescriptorPool() { |
| 534 | const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ | 543 | const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ |
| 535 | { | 544 | { |
| @@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() { | |||
| 571 | } | 580 | } |
| 572 | 581 | ||
| 573 | void BlitScreen::CreateRenderPass() { | 582 | void BlitScreen::CreateRenderPass() { |
| 574 | renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); | 583 | renderpass = CreateRenderPassImpl(image_view_format); |
| 575 | } | 584 | } |
| 576 | 585 | ||
| 577 | vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { | 586 | vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) { |
| 578 | const VkAttachmentDescription color_attachment{ | 587 | const VkAttachmentDescription color_attachment{ |
| 579 | .flags = 0, | 588 | .flags = 0, |
| 580 | .format = format, | 589 | .format = format, |
| @@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present | |||
| 584 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, | 593 | .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, |
| 585 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, | 594 | .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, |
| 586 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 595 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 587 | .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, | 596 | .finalLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 588 | }; | 597 | }; |
| 589 | 598 | ||
| 590 | const VkAttachmentReference color_attachment_ref{ | 599 | const VkAttachmentReference color_attachment_ref{ |
| @@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() { | |||
| 1052 | nn_sampler = device.GetLogical().CreateSampler(ci_nn); | 1061 | nn_sampler = device.GetLogical().CreateSampler(ci_nn); |
| 1053 | } | 1062 | } |
| 1054 | 1063 | ||
| 1055 | void BlitScreen::CreateFramebuffers() { | ||
| 1056 | const VkExtent2D size{swapchain.GetSize()}; | ||
| 1057 | framebuffers.resize(image_count); | ||
| 1058 | |||
| 1059 | for (std::size_t i = 0; i < image_count; ++i) { | ||
| 1060 | const VkImageView image_view{swapchain.GetImageViewIndex(i)}; | ||
| 1061 | framebuffers[i] = CreateFramebuffer(image_view, size, renderpass); | ||
| 1062 | } | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | void BlitScreen::ReleaseRawImages() { | 1064 | void BlitScreen::ReleaseRawImages() { |
| 1066 | for (const u64 tick : resource_ticks) { | 1065 | for (const u64 tick : resource_ticks) { |
| 1067 | scheduler.Wait(tick); | 1066 | scheduler.Wait(tick); |
| @@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1175 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); | 1174 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); |
| 1176 | return; | 1175 | return; |
| 1177 | } | 1176 | } |
| 1178 | aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); | 1177 | aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer)); |
| 1179 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); | 1178 | aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); |
| 1180 | 1179 | ||
| 1181 | const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ | 1180 | const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ |
| @@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1319 | aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); | 1318 | aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); |
| 1320 | } | 1319 | } |
| 1321 | 1320 | ||
| 1322 | void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, | 1321 | void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const { |
| 1323 | bool nn) const { | ||
| 1324 | const VkDescriptorImageInfo image_info{ | 1322 | const VkDescriptorImageInfo image_info{ |
| 1325 | .sampler = nn ? *nn_sampler : *sampler, | 1323 | .sampler = nn ? *nn_sampler : *sampler, |
| 1326 | .imageView = image_view, | 1324 | .imageView = image_view, |
| @@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag | |||
| 1356 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); | 1354 | device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); |
| 1357 | } | 1355 | } |
| 1358 | 1356 | ||
| 1359 | void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, | 1357 | void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const { |
| 1360 | bool nn) const { | ||
| 1361 | const VkDescriptorBufferInfo buffer_info{ | 1358 | const VkDescriptorBufferInfo buffer_info{ |
| 1362 | .buffer = *buffer, | 1359 | .buffer = *buffer, |
| 1363 | .offset = offsetof(BufferData, uniform), | 1360 | .offset = offsetof(BufferData, uniform), |
| @@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) | |||
| 1480 | return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; | 1477 | return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; |
| 1481 | } | 1478 | } |
| 1482 | 1479 | ||
| 1483 | u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, | 1480 | u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const { |
| 1484 | std::size_t image_index) const { | ||
| 1485 | constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); | 1481 | constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); |
| 1486 | return first_image_offset + GetSizeInBytes(framebuffer) * image_index; | 1482 | return first_image_offset + GetSizeInBytes(framebuffer) * image_index; |
| 1487 | } | 1483 | } |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ebe10b08b..68ec20253 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | 7 | ||
| 8 | #include "core/frontend/framebuffer_layout.h" | ||
| 8 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 9 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 10 | 11 | ||
| @@ -42,6 +43,9 @@ class RasterizerVulkan; | |||
| 42 | class Scheduler; | 43 | class Scheduler; |
| 43 | class SMAA; | 44 | class SMAA; |
| 44 | class Swapchain; | 45 | class Swapchain; |
| 46 | class PresentManager; | ||
| 47 | |||
| 48 | struct Frame; | ||
| 45 | 49 | ||
| 46 | struct ScreenInfo { | 50 | struct ScreenInfo { |
| 47 | VkImage image{}; | 51 | VkImage image{}; |
| @@ -55,18 +59,17 @@ class BlitScreen { | |||
| 55 | public: | 59 | public: |
| 56 | explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, | 60 | explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, |
| 57 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, | 61 | const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, |
| 58 | Scheduler& scheduler, const ScreenInfo& screen_info); | 62 | PresentManager& present_manager, Scheduler& scheduler, |
| 63 | const ScreenInfo& screen_info); | ||
| 59 | ~BlitScreen(); | 64 | ~BlitScreen(); |
| 60 | 65 | ||
| 61 | void Recreate(); | 66 | void Recreate(); |
| 62 | 67 | ||
| 63 | [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, | 68 | void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, |
| 64 | const VkFramebuffer& host_framebuffer, | 69 | const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); |
| 65 | const Layout::FramebufferLayout layout, VkExtent2D render_area, | ||
| 66 | bool use_accelerated); | ||
| 67 | 70 | ||
| 68 | [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, | 71 | void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, |
| 69 | bool use_accelerated); | 72 | bool use_accelerated, bool is_srgb); |
| 70 | 73 | ||
| 71 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, | 74 | [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, |
| 72 | VkExtent2D extent); | 75 | VkExtent2D extent); |
| @@ -79,10 +82,9 @@ private: | |||
| 79 | 82 | ||
| 80 | void CreateStaticResources(); | 83 | void CreateStaticResources(); |
| 81 | void CreateShaders(); | 84 | void CreateShaders(); |
| 82 | void CreateSemaphores(); | ||
| 83 | void CreateDescriptorPool(); | 85 | void CreateDescriptorPool(); |
| 84 | void CreateRenderPass(); | 86 | void CreateRenderPass(); |
| 85 | vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); | 87 | vk::RenderPass CreateRenderPassImpl(VkFormat format); |
| 86 | void CreateDescriptorSetLayout(); | 88 | void CreateDescriptorSetLayout(); |
| 87 | void CreateDescriptorSets(); | 89 | void CreateDescriptorSets(); |
| 88 | void CreatePipelineLayout(); | 90 | void CreatePipelineLayout(); |
| @@ -90,15 +92,14 @@ private: | |||
| 90 | void CreateSampler(); | 92 | void CreateSampler(); |
| 91 | 93 | ||
| 92 | void CreateDynamicResources(); | 94 | void CreateDynamicResources(); |
| 93 | void CreateFramebuffers(); | ||
| 94 | 95 | ||
| 95 | void RefreshResources(const Tegra::FramebufferConfig& framebuffer); | 96 | void RefreshResources(const Tegra::FramebufferConfig& framebuffer); |
| 96 | void ReleaseRawImages(); | 97 | void ReleaseRawImages(); |
| 97 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); | 98 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); |
| 98 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); | 99 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); |
| 99 | 100 | ||
| 100 | void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; | 101 | void UpdateDescriptorSet(VkImageView image_view, bool nn) const; |
| 101 | void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; | 102 | void UpdateAADescriptorSet(VkImageView image_view, bool nn) const; |
| 102 | void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; | 103 | void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; |
| 103 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, | 104 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, |
| 104 | const Layout::FramebufferLayout layout) const; | 105 | const Layout::FramebufferLayout layout) const; |
| @@ -107,16 +108,17 @@ private: | |||
| 107 | void CreateFSR(); | 108 | void CreateFSR(); |
| 108 | 109 | ||
| 109 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; | 110 | u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; |
| 110 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, | 111 | u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; |
| 111 | std::size_t image_index) const; | ||
| 112 | 112 | ||
| 113 | Core::Memory::Memory& cpu_memory; | 113 | Core::Memory::Memory& cpu_memory; |
| 114 | Core::Frontend::EmuWindow& render_window; | 114 | Core::Frontend::EmuWindow& render_window; |
| 115 | const Device& device; | 115 | const Device& device; |
| 116 | MemoryAllocator& memory_allocator; | 116 | MemoryAllocator& memory_allocator; |
| 117 | Swapchain& swapchain; | 117 | Swapchain& swapchain; |
| 118 | PresentManager& present_manager; | ||
| 118 | Scheduler& scheduler; | 119 | Scheduler& scheduler; |
| 119 | std::size_t image_count; | 120 | std::size_t image_count; |
| 121 | std::size_t image_index{}; | ||
| 120 | const ScreenInfo& screen_info; | 122 | const ScreenInfo& screen_info; |
| 121 | 123 | ||
| 122 | vk::ShaderModule vertex_shader; | 124 | vk::ShaderModule vertex_shader; |
| @@ -135,7 +137,6 @@ private: | |||
| 135 | vk::Pipeline gaussian_pipeline; | 137 | vk::Pipeline gaussian_pipeline; |
| 136 | vk::Pipeline scaleforce_pipeline; | 138 | vk::Pipeline scaleforce_pipeline; |
| 137 | vk::RenderPass renderpass; | 139 | vk::RenderPass renderpass; |
| 138 | std::vector<vk::Framebuffer> framebuffers; | ||
| 139 | vk::DescriptorSets descriptor_sets; | 140 | vk::DescriptorSets descriptor_sets; |
| 140 | vk::Sampler nn_sampler; | 141 | vk::Sampler nn_sampler; |
| 141 | vk::Sampler sampler; | 142 | vk::Sampler sampler; |
| @@ -145,7 +146,6 @@ private: | |||
| 145 | 146 | ||
| 146 | std::vector<u64> resource_ticks; | 147 | std::vector<u64> resource_ticks; |
| 147 | 148 | ||
| 148 | std::vector<vk::Semaphore> semaphores; | ||
| 149 | std::vector<vk::Image> raw_images; | 149 | std::vector<vk::Image> raw_images; |
| 150 | std::vector<vk::ImageView> raw_image_views; | 150 | std::vector<vk::ImageView> raw_image_views; |
| 151 | std::vector<MemoryCommit> raw_buffer_commits; | 151 | std::vector<MemoryCommit> raw_buffer_commits; |
| @@ -164,6 +164,8 @@ private: | |||
| 164 | u32 raw_width = 0; | 164 | u32 raw_width = 0; |
| 165 | u32 raw_height = 0; | 165 | u32 raw_height = 0; |
| 166 | Service::android::PixelFormat pixel_format{}; | 166 | Service::android::PixelFormat pixel_format{}; |
| 167 | bool current_srgb; | ||
| 168 | VkFormat image_view_format; | ||
| 167 | 169 | ||
| 168 | std::unique_ptr<FSR> fsr; | 170 | std::unique_ptr<FSR> fsr; |
| 169 | std::unique_ptr<SMAA> smaa; | 171 | std::unique_ptr<SMAA> smaa; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 9cbcb3c8f..510602e8e 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -314,8 +314,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | |||
| 314 | return staging_pool.Request(size, MemoryUsage::Upload); | 314 | return staging_pool.Request(size, MemoryUsage::Upload); |
| 315 | } | 315 | } |
| 316 | 316 | ||
| 317 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { | 317 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { |
| 318 | return staging_pool.Request(size, MemoryUsage::Download); | 318 | return staging_pool.Request(size, MemoryUsage::Download, deferred); |
| 319 | } | ||
| 320 | |||
| 321 | void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { | ||
| 322 | staging_pool.FreeDeferred(ref); | ||
| 319 | } | 323 | } |
| 320 | 324 | ||
| 321 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { | 325 | u64 BufferCacheRuntime::GetDeviceLocalMemory() const { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 183b33632..879f1ed94 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -3,7 +3,8 @@ | |||
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include "video_core/buffer_cache/buffer_cache.h" | 6 | #include "video_core/buffer_cache/buffer_cache_base.h" |
| 7 | #include "video_core/buffer_cache/memory_tracker_base.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 9 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -75,7 +76,9 @@ public: | |||
| 75 | 76 | ||
| 76 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | 77 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 77 | 78 | ||
| 78 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | 79 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); |
| 80 | |||
| 81 | void FreeDeferredStagingBuffer(StagingBufferRef& ref); | ||
| 79 | 82 | ||
| 80 | void PreCopyBarrier(); | 83 | void PreCopyBarrier(); |
| 81 | 84 | ||
| @@ -142,6 +145,8 @@ private: | |||
| 142 | struct BufferCacheParams { | 145 | struct BufferCacheParams { |
| 143 | using Runtime = Vulkan::BufferCacheRuntime; | 146 | using Runtime = Vulkan::BufferCacheRuntime; |
| 144 | using Buffer = Vulkan::Buffer; | 147 | using Buffer = Vulkan::Buffer; |
| 148 | using Async_Buffer = Vulkan::StagingBufferRef; | ||
| 149 | using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; | ||
| 145 | 150 | ||
| 146 | static constexpr bool IS_OPENGL = false; | 151 | static constexpr bool IS_OPENGL = false; |
| 147 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | 152 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; |
| @@ -150,6 +155,7 @@ struct BufferCacheParams { | |||
| 150 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 155 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 151 | static constexpr bool USE_MEMORY_MAPS = true; | 156 | static constexpr bool USE_MEMORY_MAPS = true; |
| 152 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | 157 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; |
| 158 | static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; | ||
| 153 | }; | 159 | }; |
| 154 | 160 | ||
| 155 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 161 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp new file mode 100644 index 000000000..f9e271507 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 5 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>; | ||
| 9 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 985cc3203..a318d643e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -696,6 +696,13 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 696 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | 696 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 697 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, | 697 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, |
| 698 | PipelineStatistics* statistics, bool build_in_parallel) try { | 698 | PipelineStatistics* statistics, bool build_in_parallel) try { |
| 699 | // TODO: Remove this when Intel fixes their shader compiler. | ||
| 700 | // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159 | ||
| 701 | if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | ||
| 702 | LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); | ||
| 703 | return nullptr; | ||
| 704 | } | ||
| 705 | |||
| 699 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | 706 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 700 | 707 | ||
| 701 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 708 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp new file mode 100644 index 000000000..c49583013 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp | |||
| @@ -0,0 +1,457 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/microprofile.h" | ||
| 5 | #include "common/settings.h" | ||
| 6 | #include "common/thread.h" | ||
| 7 | #include "video_core/renderer_vulkan/vk_present_manager.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_swapchain.h" | ||
| 10 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 11 | |||
| 12 | namespace Vulkan { | ||
| 13 | |||
| 14 | MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128)); | ||
| 15 | MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192)); | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) { | ||
| 20 | const VkFormatProperties props{physical_device.GetFormatProperties(format)}; | ||
| 21 | return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT); | ||
| 22 | } | ||
| 23 | |||
| 24 | [[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() { | ||
| 25 | return VkImageSubresourceLayers{ | ||
| 26 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 27 | .mipLevel = 0, | ||
| 28 | .baseArrayLayer = 0, | ||
| 29 | .layerCount = 1, | ||
| 30 | }; | ||
| 31 | } | ||
| 32 | |||
| 33 | [[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width, | ||
| 34 | s32 swapchain_height) { | ||
| 35 | return VkImageBlit{ | ||
| 36 | .srcSubresource = MakeImageSubresourceLayers(), | ||
| 37 | .srcOffsets = | ||
| 38 | { | ||
| 39 | { | ||
| 40 | .x = 0, | ||
| 41 | .y = 0, | ||
| 42 | .z = 0, | ||
| 43 | }, | ||
| 44 | { | ||
| 45 | .x = frame_width, | ||
| 46 | .y = frame_height, | ||
| 47 | .z = 1, | ||
| 48 | }, | ||
| 49 | }, | ||
| 50 | .dstSubresource = MakeImageSubresourceLayers(), | ||
| 51 | .dstOffsets = | ||
| 52 | { | ||
| 53 | { | ||
| 54 | .x = 0, | ||
| 55 | .y = 0, | ||
| 56 | .z = 0, | ||
| 57 | }, | ||
| 58 | { | ||
| 59 | .x = swapchain_width, | ||
| 60 | .y = swapchain_height, | ||
| 61 | .z = 1, | ||
| 62 | }, | ||
| 63 | }, | ||
| 64 | }; | ||
| 65 | } | ||
| 66 | |||
| 67 | [[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width, | ||
| 68 | u32 swapchain_height) { | ||
| 69 | return VkImageCopy{ | ||
| 70 | .srcSubresource = MakeImageSubresourceLayers(), | ||
| 71 | .srcOffset = | ||
| 72 | { | ||
| 73 | .x = 0, | ||
| 74 | .y = 0, | ||
| 75 | .z = 0, | ||
| 76 | }, | ||
| 77 | .dstSubresource = MakeImageSubresourceLayers(), | ||
| 78 | .dstOffset = | ||
| 79 | { | ||
| 80 | .x = 0, | ||
| 81 | .y = 0, | ||
| 82 | .z = 0, | ||
| 83 | }, | ||
| 84 | .extent = | ||
| 85 | { | ||
| 86 | .width = std::min(frame_width, swapchain_width), | ||
| 87 | .height = std::min(frame_height, swapchain_height), | ||
| 88 | .depth = 1, | ||
| 89 | }, | ||
| 90 | }; | ||
| 91 | } | ||
| 92 | |||
| 93 | } // Anonymous namespace | ||
| 94 | |||
| 95 | PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_, | ||
| 96 | MemoryAllocator& memory_allocator_, Scheduler& scheduler_, | ||
| 97 | Swapchain& swapchain_) | ||
| 98 | : render_window{render_window_}, device{device_}, | ||
| 99 | memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_}, | ||
| 100 | blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, | ||
| 101 | use_present_thread{Settings::values.async_presentation.GetValue()}, | ||
| 102 | image_count{swapchain.GetImageCount()} { | ||
| 103 | |||
| 104 | auto& dld = device.GetLogical(); | ||
| 105 | cmdpool = dld.CreateCommandPool({ | ||
| 106 | .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, | ||
| 107 | .pNext = nullptr, | ||
| 108 | .flags = | ||
| 109 | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, | ||
| 110 | .queueFamilyIndex = device.GetGraphicsFamily(), | ||
| 111 | }); | ||
| 112 | auto cmdbuffers = cmdpool.Allocate(image_count); | ||
| 113 | |||
| 114 | frames.resize(image_count); | ||
| 115 | for (u32 i = 0; i < frames.size(); i++) { | ||
| 116 | Frame& frame = frames[i]; | ||
| 117 | frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()}; | ||
| 118 | frame.render_ready = dld.CreateSemaphore({ | ||
| 119 | .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, | ||
| 120 | .pNext = nullptr, | ||
| 121 | .flags = 0, | ||
| 122 | }); | ||
| 123 | frame.present_done = dld.CreateFence({ | ||
| 124 | .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, | ||
| 125 | .pNext = nullptr, | ||
| 126 | .flags = VK_FENCE_CREATE_SIGNALED_BIT, | ||
| 127 | }); | ||
| 128 | free_queue.push(&frame); | ||
| 129 | } | ||
| 130 | |||
| 131 | if (use_present_thread) { | ||
| 132 | present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); }); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | PresentManager::~PresentManager() = default; | ||
| 137 | |||
| 138 | Frame* PresentManager::GetRenderFrame() { | ||
| 139 | MICROPROFILE_SCOPE(Vulkan_WaitPresent); | ||
| 140 | |||
| 141 | // Wait for free presentation frames | ||
| 142 | std::unique_lock lock{free_mutex}; | ||
| 143 | free_cv.wait(lock, [this] { return !free_queue.empty(); }); | ||
| 144 | |||
| 145 | // Take the frame from the queue | ||
| 146 | Frame* frame = free_queue.front(); | ||
| 147 | free_queue.pop(); | ||
| 148 | |||
| 149 | // Wait for the presentation to be finished so all frame resources are free | ||
| 150 | frame->present_done.Wait(); | ||
| 151 | frame->present_done.Reset(); | ||
| 152 | |||
| 153 | return frame; | ||
| 154 | } | ||
| 155 | |||
| 156 | void PresentManager::Present(Frame* frame) { | ||
| 157 | if (!use_present_thread) { | ||
| 158 | scheduler.WaitWorker(); | ||
| 159 | CopyToSwapchain(frame); | ||
| 160 | free_queue.push(frame); | ||
| 161 | return; | ||
| 162 | } | ||
| 163 | |||
| 164 | scheduler.Record([this, frame](vk::CommandBuffer) { | ||
| 165 | std::unique_lock lock{queue_mutex}; | ||
| 166 | present_queue.push(frame); | ||
| 167 | frame_cv.notify_one(); | ||
| 168 | }); | ||
| 169 | } | ||
| 170 | |||
| 171 | void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb, | ||
| 172 | VkFormat image_view_format, VkRenderPass rd) { | ||
| 173 | auto& dld = device.GetLogical(); | ||
| 174 | |||
| 175 | frame->width = width; | ||
| 176 | frame->height = height; | ||
| 177 | frame->is_srgb = is_srgb; | ||
| 178 | |||
| 179 | frame->image = dld.CreateImage({ | ||
| 180 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | ||
| 181 | .pNext = nullptr, | ||
| 182 | .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, | ||
| 183 | .imageType = VK_IMAGE_TYPE_2D, | ||
| 184 | .format = swapchain.GetImageFormat(), | ||
| 185 | .extent = | ||
| 186 | { | ||
| 187 | .width = width, | ||
| 188 | .height = height, | ||
| 189 | .depth = 1, | ||
| 190 | }, | ||
| 191 | .mipLevels = 1, | ||
| 192 | .arrayLayers = 1, | ||
| 193 | .samples = VK_SAMPLE_COUNT_1_BIT, | ||
| 194 | .tiling = VK_IMAGE_TILING_OPTIMAL, | ||
| 195 | .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, | ||
| 196 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 197 | .queueFamilyIndexCount = 0, | ||
| 198 | .pQueueFamilyIndices = nullptr, | ||
| 199 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 200 | }); | ||
| 201 | |||
| 202 | frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal); | ||
| 203 | |||
| 204 | frame->image_view = dld.CreateImageView({ | ||
| 205 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 206 | .pNext = nullptr, | ||
| 207 | .flags = 0, | ||
| 208 | .image = *frame->image, | ||
| 209 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 210 | .format = image_view_format, | ||
| 211 | .components = | ||
| 212 | { | ||
| 213 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 214 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 215 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 216 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 217 | }, | ||
| 218 | .subresourceRange = | ||
| 219 | { | ||
| 220 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 221 | .baseMipLevel = 0, | ||
| 222 | .levelCount = 1, | ||
| 223 | .baseArrayLayer = 0, | ||
| 224 | .layerCount = 1, | ||
| 225 | }, | ||
| 226 | }); | ||
| 227 | |||
| 228 | const VkImageView image_view{*frame->image_view}; | ||
| 229 | frame->framebuffer = dld.CreateFramebuffer({ | ||
| 230 | .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, | ||
| 231 | .pNext = nullptr, | ||
| 232 | .flags = 0, | ||
| 233 | .renderPass = rd, | ||
| 234 | .attachmentCount = 1, | ||
| 235 | .pAttachments = &image_view, | ||
| 236 | .width = width, | ||
| 237 | .height = height, | ||
| 238 | .layers = 1, | ||
| 239 | }); | ||
| 240 | } | ||
| 241 | |||
| 242 | void PresentManager::WaitPresent() { | ||
| 243 | if (!use_present_thread) { | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | |||
| 247 | // Wait for the present queue to be empty | ||
| 248 | { | ||
| 249 | std::unique_lock queue_lock{queue_mutex}; | ||
| 250 | frame_cv.wait(queue_lock, [this] { return present_queue.empty(); }); | ||
| 251 | } | ||
| 252 | |||
| 253 | // The above condition will be satisfied when the last frame is taken from the queue. | ||
| 254 | // To ensure that frame has been presented as well take hold of the swapchain | ||
| 255 | // mutex. | ||
| 256 | std::scoped_lock swapchain_lock{swapchain_mutex}; | ||
| 257 | } | ||
| 258 | |||
| 259 | void PresentManager::PresentThread(std::stop_token token) { | ||
| 260 | Common::SetCurrentThreadName("VulkanPresent"); | ||
| 261 | while (!token.stop_requested()) { | ||
| 262 | std::unique_lock lock{queue_mutex}; | ||
| 263 | |||
| 264 | // Wait for presentation frames | ||
| 265 | Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); }); | ||
| 266 | if (token.stop_requested()) { | ||
| 267 | return; | ||
| 268 | } | ||
| 269 | |||
| 270 | // Take the frame and notify anyone waiting | ||
| 271 | Frame* frame = present_queue.front(); | ||
| 272 | present_queue.pop(); | ||
| 273 | frame_cv.notify_one(); | ||
| 274 | |||
| 275 | // By exchanging the lock ownership we take the swapchain lock | ||
| 276 | // before the queue lock goes out of scope. This way the swapchain | ||
| 277 | // lock in WaitPresent is guaranteed to occur after here. | ||
| 278 | std::exchange(lock, std::unique_lock{swapchain_mutex}); | ||
| 279 | |||
| 280 | CopyToSwapchain(frame); | ||
| 281 | |||
| 282 | // Free the frame for reuse | ||
| 283 | std::scoped_lock fl{free_mutex}; | ||
| 284 | free_queue.push(frame); | ||
| 285 | free_cv.notify_one(); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | void PresentManager::CopyToSwapchain(Frame* frame) { | ||
| 290 | MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); | ||
| 291 | |||
| 292 | const auto recreate_swapchain = [&] { | ||
| 293 | swapchain.Create(frame->width, frame->height, frame->is_srgb); | ||
| 294 | image_count = swapchain.GetImageCount(); | ||
| 295 | }; | ||
| 296 | |||
| 297 | // If the size or colorspace of the incoming frames has changed, recreate the swapchain | ||
| 298 | // to account for that. | ||
| 299 | const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb); | ||
| 300 | const bool size_changed = | ||
| 301 | swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; | ||
| 302 | if (srgb_changed || size_changed) { | ||
| 303 | recreate_swapchain(); | ||
| 304 | } | ||
| 305 | |||
| 306 | while (swapchain.AcquireNextImage()) { | ||
| 307 | recreate_swapchain(); | ||
| 308 | } | ||
| 309 | |||
| 310 | const vk::CommandBuffer cmdbuf{frame->cmdbuf}; | ||
| 311 | cmdbuf.Begin({ | ||
| 312 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 313 | .pNext = nullptr, | ||
| 314 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 315 | .pInheritanceInfo = nullptr, | ||
| 316 | }); | ||
| 317 | |||
| 318 | const VkImage image{swapchain.CurrentImage()}; | ||
| 319 | const VkExtent2D extent = swapchain.GetExtent(); | ||
| 320 | const std::array pre_barriers{ | ||
| 321 | VkImageMemoryBarrier{ | ||
| 322 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 323 | .pNext = nullptr, | ||
| 324 | .srcAccessMask = 0, | ||
| 325 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 326 | .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, | ||
| 327 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 328 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 329 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 330 | .image = image, | ||
| 331 | .subresourceRange{ | ||
| 332 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 333 | .baseMipLevel = 0, | ||
| 334 | .levelCount = 1, | ||
| 335 | .baseArrayLayer = 0, | ||
| 336 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 337 | }, | ||
| 338 | }, | ||
| 339 | VkImageMemoryBarrier{ | ||
| 340 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 341 | .pNext = nullptr, | ||
| 342 | .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, | ||
| 343 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 344 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 345 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 346 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 347 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 348 | .image = *frame->image, | ||
| 349 | .subresourceRange{ | ||
| 350 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 351 | .baseMipLevel = 0, | ||
| 352 | .levelCount = 1, | ||
| 353 | .baseArrayLayer = 0, | ||
| 354 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 355 | }, | ||
| 356 | }, | ||
| 357 | }; | ||
| 358 | const std::array post_barriers{ | ||
| 359 | VkImageMemoryBarrier{ | ||
| 360 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 361 | .pNext = nullptr, | ||
| 362 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 363 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, | ||
| 364 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 365 | .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, | ||
| 366 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 367 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 368 | .image = image, | ||
| 369 | .subresourceRange{ | ||
| 370 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 371 | .baseMipLevel = 0, | ||
| 372 | .levelCount = 1, | ||
| 373 | .baseArrayLayer = 0, | ||
| 374 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 375 | }, | ||
| 376 | }, | ||
| 377 | VkImageMemoryBarrier{ | ||
| 378 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 379 | .pNext = nullptr, | ||
| 380 | .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 381 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 382 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 383 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 384 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 385 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 386 | .image = *frame->image, | ||
| 387 | .subresourceRange{ | ||
| 388 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 389 | .baseMipLevel = 0, | ||
| 390 | .levelCount = 1, | ||
| 391 | .baseArrayLayer = 0, | ||
| 392 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 393 | }, | ||
| 394 | }, | ||
| 395 | }; | ||
| 396 | |||
| 397 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {}, | ||
| 398 | {}, {}, pre_barriers); | ||
| 399 | |||
| 400 | if (blit_supported) { | ||
| 401 | cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, | ||
| 402 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 403 | MakeImageBlit(frame->width, frame->height, extent.width, extent.height), | ||
| 404 | VK_FILTER_LINEAR); | ||
| 405 | } else { | ||
| 406 | cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, | ||
| 407 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||
| 408 | MakeImageCopy(frame->width, frame->height, extent.width, extent.height)); | ||
| 409 | } | ||
| 410 | |||
| 411 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {}, | ||
| 412 | {}, {}, post_barriers); | ||
| 413 | |||
| 414 | cmdbuf.End(); | ||
| 415 | |||
| 416 | const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore(); | ||
| 417 | const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore(); | ||
| 418 | const std::array wait_semaphores = {present_semaphore, *frame->render_ready}; | ||
| 419 | |||
| 420 | static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{ | ||
| 421 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | ||
| 422 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 423 | }; | ||
| 424 | |||
| 425 | const VkSubmitInfo submit_info{ | ||
| 426 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | ||
| 427 | .pNext = nullptr, | ||
| 428 | .waitSemaphoreCount = 2U, | ||
| 429 | .pWaitSemaphores = wait_semaphores.data(), | ||
| 430 | .pWaitDstStageMask = wait_stage_masks.data(), | ||
| 431 | .commandBufferCount = 1, | ||
| 432 | .pCommandBuffers = cmdbuf.address(), | ||
| 433 | .signalSemaphoreCount = 1U, | ||
| 434 | .pSignalSemaphores = &render_semaphore, | ||
| 435 | }; | ||
| 436 | |||
| 437 | // Submit the image copy/blit to the swapchain | ||
| 438 | { | ||
| 439 | std::scoped_lock lock{scheduler.submit_mutex}; | ||
| 440 | switch (const VkResult result = | ||
| 441 | device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) { | ||
| 442 | case VK_SUCCESS: | ||
| 443 | break; | ||
| 444 | case VK_ERROR_DEVICE_LOST: | ||
| 445 | device.ReportLoss(); | ||
| 446 | [[fallthrough]]; | ||
| 447 | default: | ||
| 448 | vk::Check(result); | ||
| 449 | break; | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | // Present | ||
| 454 | swapchain.Present(render_semaphore); | ||
| 455 | } | ||
| 456 | |||
| 457 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h new file mode 100644 index 000000000..420a775e2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_present_manager.h | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <condition_variable> | ||
| 7 | #include <mutex> | ||
| 8 | #include <queue> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/polyfill_thread.h" | ||
| 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 14 | |||
| 15 | namespace Core::Frontend { | ||
| 16 | class EmuWindow; | ||
| 17 | } // namespace Core::Frontend | ||
| 18 | |||
| 19 | namespace Vulkan { | ||
| 20 | |||
| 21 | class Device; | ||
| 22 | class Scheduler; | ||
| 23 | class Swapchain; | ||
| 24 | |||
| 25 | struct Frame { | ||
| 26 | u32 width; | ||
| 27 | u32 height; | ||
| 28 | bool is_srgb; | ||
| 29 | vk::Image image; | ||
| 30 | vk::ImageView image_view; | ||
| 31 | vk::Framebuffer framebuffer; | ||
| 32 | MemoryCommit image_commit; | ||
| 33 | vk::CommandBuffer cmdbuf; | ||
| 34 | vk::Semaphore render_ready; | ||
| 35 | vk::Fence present_done; | ||
| 36 | }; | ||
| 37 | |||
| 38 | class PresentManager { | ||
| 39 | public: | ||
| 40 | PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device, | ||
| 41 | MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain); | ||
| 42 | ~PresentManager(); | ||
| 43 | |||
| 44 | /// Returns the last used presentation frame | ||
| 45 | Frame* GetRenderFrame(); | ||
| 46 | |||
| 47 | /// Pushes a frame for presentation | ||
| 48 | void Present(Frame* frame); | ||
| 49 | |||
| 50 | /// Recreates the present frame to match the provided parameters | ||
| 51 | void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb, | ||
| 52 | VkFormat image_view_format, VkRenderPass rd); | ||
| 53 | |||
| 54 | /// Waits for the present thread to finish presenting all queued frames. | ||
| 55 | void WaitPresent(); | ||
| 56 | |||
| 57 | private: | ||
| 58 | void PresentThread(std::stop_token token); | ||
| 59 | |||
| 60 | void CopyToSwapchain(Frame* frame); | ||
| 61 | |||
| 62 | private: | ||
| 63 | Core::Frontend::EmuWindow& render_window; | ||
| 64 | const Device& device; | ||
| 65 | MemoryAllocator& memory_allocator; | ||
| 66 | Scheduler& scheduler; | ||
| 67 | Swapchain& swapchain; | ||
| 68 | vk::CommandPool cmdpool; | ||
| 69 | std::vector<Frame> frames; | ||
| 70 | std::queue<Frame*> present_queue; | ||
| 71 | std::queue<Frame*> free_queue; | ||
| 72 | std::condition_variable_any frame_cv; | ||
| 73 | std::condition_variable free_cv; | ||
| 74 | std::mutex swapchain_mutex; | ||
| 75 | std::mutex queue_mutex; | ||
| 76 | std::mutex free_mutex; | ||
| 77 | std::jthread present_thread; | ||
| 78 | bool blit_supported; | ||
| 79 | bool use_present_thread; | ||
| 80 | std::size_t image_count; | ||
| 81 | }; | ||
| 82 | |||
| 83 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 057e16967..80455ec08 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_) | |||
| 46 | 46 | ||
| 47 | Scheduler::~Scheduler() = default; | 47 | Scheduler::~Scheduler() = default; |
| 48 | 48 | ||
| 49 | void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 49 | u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| 50 | // When flushing, we only send data to the worker thread; no waiting is necessary. | 50 | // When flushing, we only send data to the worker thread; no waiting is necessary. |
| 51 | SubmitExecution(signal_semaphore, wait_semaphore); | 51 | const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore); |
| 52 | AllocateNewContext(); | 52 | AllocateNewContext(); |
| 53 | return signal_value; | ||
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 56 | void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| @@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() { | |||
| 205 | }); | 206 | }); |
| 206 | } | 207 | } |
| 207 | 208 | ||
| 208 | void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { | 209 | u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { |
| 209 | EndPendingOperations(); | 210 | EndPendingOperations(); |
| 210 | InvalidateState(); | 211 | InvalidateState(); |
| 211 | 212 | ||
| @@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s | |||
| 217 | on_submit(); | 218 | on_submit(); |
| 218 | } | 219 | } |
| 219 | 220 | ||
| 221 | std::scoped_lock lock{submit_mutex}; | ||
| 220 | switch (const VkResult result = master_semaphore->SubmitQueue( | 222 | switch (const VkResult result = master_semaphore->SubmitQueue( |
| 221 | cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { | 223 | cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { |
| 222 | case VK_SUCCESS: | 224 | case VK_SUCCESS: |
| @@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s | |||
| 231 | }); | 233 | }); |
| 232 | chunk->MarkSubmit(); | 234 | chunk->MarkSubmit(); |
| 233 | DispatchWork(); | 235 | DispatchWork(); |
| 236 | return signal_value; | ||
| 234 | } | 237 | } |
| 235 | 238 | ||
| 236 | void Scheduler::AllocateNewContext() { | 239 | void Scheduler::AllocateNewContext() { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 8d75ce987..475c682eb 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -34,7 +34,7 @@ public: | |||
| 34 | ~Scheduler(); | 34 | ~Scheduler(); |
| 35 | 35 | ||
| 36 | /// Sends the current execution context to the GPU. | 36 | /// Sends the current execution context to the GPU. |
| 37 | void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); | 37 | u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); |
| 38 | 38 | ||
| 39 | /// Sends the current execution context to the GPU and waits for it to complete. | 39 | /// Sends the current execution context to the GPU and waits for it to complete. |
| 40 | void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); | 40 | void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); |
| @@ -106,6 +106,8 @@ public: | |||
| 106 | return *master_semaphore; | 106 | return *master_semaphore; |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | std::mutex submit_mutex; | ||
| 110 | |||
| 109 | private: | 111 | private: |
| 110 | class Command { | 112 | class Command { |
| 111 | public: | 113 | public: |
| @@ -201,7 +203,7 @@ private: | |||
| 201 | 203 | ||
| 202 | void AllocateWorkerCommandBuffer(); | 204 | void AllocateWorkerCommandBuffer(); |
| 203 | 205 | ||
| 204 | void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); | 206 | u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); |
| 205 | 207 | ||
| 206 | void AllocateNewContext(); | 208 | void AllocateNewContext(); |
| 207 | 209 | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index b1465e35c..23bbea7f1 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -99,18 +99,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) { | |||
| 99 | return; | 99 | return; |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | device.GetLogical().WaitIdle(); | ||
| 103 | Destroy(); | 102 | Destroy(); |
| 104 | 103 | ||
| 105 | CreateSwapchain(capabilities, srgb); | 104 | CreateSwapchain(capabilities, srgb); |
| 106 | CreateSemaphores(); | 105 | CreateSemaphores(); |
| 107 | CreateImageViews(); | ||
| 108 | 106 | ||
| 109 | resource_ticks.clear(); | 107 | resource_ticks.clear(); |
| 110 | resource_ticks.resize(image_count); | 108 | resource_ticks.resize(image_count); |
| 111 | } | 109 | } |
| 112 | 110 | ||
| 113 | void Swapchain::AcquireNextImage() { | 111 | bool Swapchain::AcquireNextImage() { |
| 114 | const VkResult result = device.GetLogical().AcquireNextImageKHR( | 112 | const VkResult result = device.GetLogical().AcquireNextImageKHR( |
| 115 | *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], | 113 | *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], |
| 116 | VK_NULL_HANDLE, &image_index); | 114 | VK_NULL_HANDLE, &image_index); |
| @@ -127,8 +125,11 @@ void Swapchain::AcquireNextImage() { | |||
| 127 | LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); | 125 | LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); |
| 128 | break; | 126 | break; |
| 129 | } | 127 | } |
| 128 | |||
| 130 | scheduler.Wait(resource_ticks[image_index]); | 129 | scheduler.Wait(resource_ticks[image_index]); |
| 131 | resource_ticks[image_index] = scheduler.CurrentTick(); | 130 | resource_ticks[image_index] = scheduler.CurrentTick(); |
| 131 | |||
| 132 | return is_suboptimal || is_outdated; | ||
| 132 | } | 133 | } |
| 133 | 134 | ||
| 134 | void Swapchain::Present(VkSemaphore render_semaphore) { | 135 | void Swapchain::Present(VkSemaphore render_semaphore) { |
| @@ -143,6 +144,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) { | |||
| 143 | .pImageIndices = &image_index, | 144 | .pImageIndices = &image_index, |
| 144 | .pResults = nullptr, | 145 | .pResults = nullptr, |
| 145 | }; | 146 | }; |
| 147 | std::scoped_lock lock{scheduler.submit_mutex}; | ||
| 146 | switch (const VkResult result = present_queue.Present(present_info)) { | 148 | switch (const VkResult result = present_queue.Present(present_info)) { |
| 147 | case VK_SUCCESS: | 149 | case VK_SUCCESS: |
| 148 | break; | 150 | break; |
| @@ -168,7 +170,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo | |||
| 168 | const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; | 170 | const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; |
| 169 | 171 | ||
| 170 | const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)}; | 172 | const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)}; |
| 171 | const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; | 173 | surface_format = ChooseSwapSurfaceFormat(formats); |
| 172 | present_mode = ChooseSwapPresentMode(present_modes); | 174 | present_mode = ChooseSwapPresentMode(present_modes); |
| 173 | 175 | ||
| 174 | u32 requested_image_count{capabilities.minImageCount + 1}; | 176 | u32 requested_image_count{capabilities.minImageCount + 1}; |
| @@ -193,7 +195,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo | |||
| 193 | .imageColorSpace = surface_format.colorSpace, | 195 | .imageColorSpace = surface_format.colorSpace, |
| 194 | .imageExtent = {}, | 196 | .imageExtent = {}, |
| 195 | .imageArrayLayers = 1, | 197 | .imageArrayLayers = 1, |
| 196 | .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, | 198 | .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, |
| 197 | .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, | 199 | .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 198 | .queueFamilyIndexCount = 0, | 200 | .queueFamilyIndexCount = 0, |
| 199 | .pQueueFamilyIndices = nullptr, | 201 | .pQueueFamilyIndices = nullptr, |
| @@ -241,45 +243,14 @@ void Swapchain::CreateSemaphores() { | |||
| 241 | present_semaphores.resize(image_count); | 243 | present_semaphores.resize(image_count); |
| 242 | std::ranges::generate(present_semaphores, | 244 | std::ranges::generate(present_semaphores, |
| 243 | [this] { return device.GetLogical().CreateSemaphore(); }); | 245 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 244 | } | 246 | render_semaphores.resize(image_count); |
| 245 | 247 | std::ranges::generate(render_semaphores, | |
| 246 | void Swapchain::CreateImageViews() { | 248 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 247 | VkImageViewCreateInfo ci{ | ||
| 248 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | ||
| 249 | .pNext = nullptr, | ||
| 250 | .flags = 0, | ||
| 251 | .image = {}, | ||
| 252 | .viewType = VK_IMAGE_VIEW_TYPE_2D, | ||
| 253 | .format = image_view_format, | ||
| 254 | .components = | ||
| 255 | { | ||
| 256 | .r = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 257 | .g = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 258 | .b = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 259 | .a = VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 260 | }, | ||
| 261 | .subresourceRange = | ||
| 262 | { | ||
| 263 | .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, | ||
| 264 | .baseMipLevel = 0, | ||
| 265 | .levelCount = 1, | ||
| 266 | .baseArrayLayer = 0, | ||
| 267 | .layerCount = 1, | ||
| 268 | }, | ||
| 269 | }; | ||
| 270 | |||
| 271 | image_views.resize(image_count); | ||
| 272 | for (std::size_t i = 0; i < image_count; i++) { | ||
| 273 | ci.image = images[i]; | ||
| 274 | image_views[i] = device.GetLogical().CreateImageView(ci); | ||
| 275 | } | ||
| 276 | } | 249 | } |
| 277 | 250 | ||
| 278 | void Swapchain::Destroy() { | 251 | void Swapchain::Destroy() { |
| 279 | frame_index = 0; | 252 | frame_index = 0; |
| 280 | present_semaphores.clear(); | 253 | present_semaphores.clear(); |
| 281 | framebuffers.clear(); | ||
| 282 | image_views.clear(); | ||
| 283 | swapchain.reset(); | 254 | swapchain.reset(); |
| 284 | } | 255 | } |
| 285 | 256 | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index caf1ff32b..419742586 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -27,7 +27,7 @@ public: | |||
| 27 | void Create(u32 width, u32 height, bool srgb); | 27 | void Create(u32 width, u32 height, bool srgb); |
| 28 | 28 | ||
| 29 | /// Acquires the next image in the swapchain, waits as needed. | 29 | /// Acquires the next image in the swapchain, waits as needed. |
| 30 | void AcquireNextImage(); | 30 | bool AcquireNextImage(); |
| 31 | 31 | ||
| 32 | /// Presents the rendered image to the swapchain. | 32 | /// Presents the rendered image to the swapchain. |
| 33 | void Present(VkSemaphore render_semaphore); | 33 | void Present(VkSemaphore render_semaphore); |
| @@ -52,6 +52,11 @@ public: | |||
| 52 | return is_suboptimal; | 52 | return is_suboptimal; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | /// Returns true when the swapchain format is in the srgb color space | ||
| 56 | bool IsSrgb() const { | ||
| 57 | return current_srgb; | ||
| 58 | } | ||
| 59 | |||
| 55 | VkExtent2D GetSize() const { | 60 | VkExtent2D GetSize() const { |
| 56 | return extent; | 61 | return extent; |
| 57 | } | 62 | } |
| @@ -64,22 +69,34 @@ public: | |||
| 64 | return image_index; | 69 | return image_index; |
| 65 | } | 70 | } |
| 66 | 71 | ||
| 72 | std::size_t GetFrameIndex() const { | ||
| 73 | return frame_index; | ||
| 74 | } | ||
| 75 | |||
| 67 | VkImage GetImageIndex(std::size_t index) const { | 76 | VkImage GetImageIndex(std::size_t index) const { |
| 68 | return images[index]; | 77 | return images[index]; |
| 69 | } | 78 | } |
| 70 | 79 | ||
| 71 | VkImageView GetImageViewIndex(std::size_t index) const { | 80 | VkImage CurrentImage() const { |
| 72 | return *image_views[index]; | 81 | return images[image_index]; |
| 73 | } | 82 | } |
| 74 | 83 | ||
| 75 | VkFormat GetImageViewFormat() const { | 84 | VkFormat GetImageViewFormat() const { |
| 76 | return image_view_format; | 85 | return image_view_format; |
| 77 | } | 86 | } |
| 78 | 87 | ||
| 88 | VkFormat GetImageFormat() const { | ||
| 89 | return surface_format.format; | ||
| 90 | } | ||
| 91 | |||
| 79 | VkSemaphore CurrentPresentSemaphore() const { | 92 | VkSemaphore CurrentPresentSemaphore() const { |
| 80 | return *present_semaphores[frame_index]; | 93 | return *present_semaphores[frame_index]; |
| 81 | } | 94 | } |
| 82 | 95 | ||
| 96 | VkSemaphore CurrentRenderSemaphore() const { | ||
| 97 | return *render_semaphores[frame_index]; | ||
| 98 | } | ||
| 99 | |||
| 83 | u32 GetWidth() const { | 100 | u32 GetWidth() const { |
| 84 | return width; | 101 | return width; |
| 85 | } | 102 | } |
| @@ -88,6 +105,10 @@ public: | |||
| 88 | return height; | 105 | return height; |
| 89 | } | 106 | } |
| 90 | 107 | ||
| 108 | VkExtent2D GetExtent() const { | ||
| 109 | return extent; | ||
| 110 | } | ||
| 111 | |||
| 91 | private: | 112 | private: |
| 92 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); | 113 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); |
| 93 | void CreateSemaphores(); | 114 | void CreateSemaphores(); |
| @@ -107,10 +128,9 @@ private: | |||
| 107 | 128 | ||
| 108 | std::size_t image_count{}; | 129 | std::size_t image_count{}; |
| 109 | std::vector<VkImage> images; | 130 | std::vector<VkImage> images; |
| 110 | std::vector<vk::ImageView> image_views; | ||
| 111 | std::vector<vk::Framebuffer> framebuffers; | ||
| 112 | std::vector<u64> resource_ticks; | 131 | std::vector<u64> resource_ticks; |
| 113 | std::vector<vk::Semaphore> present_semaphores; | 132 | std::vector<vk::Semaphore> present_semaphores; |
| 133 | std::vector<vk::Semaphore> render_semaphores; | ||
| 114 | 134 | ||
| 115 | u32 width; | 135 | u32 width; |
| 116 | u32 height; | 136 | u32 height; |
| @@ -121,6 +141,7 @@ private: | |||
| 121 | VkFormat image_view_format{}; | 141 | VkFormat image_view_format{}; |
| 122 | VkExtent2D extent{}; | 142 | VkExtent2D extent{}; |
| 123 | VkPresentModeKHR present_mode{}; | 143 | VkPresentModeKHR present_mode{}; |
| 144 | VkSurfaceFormatKHR surface_format{}; | ||
| 124 | 145 | ||
| 125 | bool current_srgb{}; | 146 | bool current_srgb{}; |
| 126 | bool current_fps_unlocked{}; | 147 | bool current_fps_unlocked{}; |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 009dab0b6..0630ebda5 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -14,13 +14,18 @@ namespace Vulkan { | |||
| 14 | 14 | ||
| 15 | UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) | 15 | UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) |
| 16 | : device{device_}, scheduler{scheduler_} { | 16 | : device{device_}, scheduler{scheduler_} { |
| 17 | payload_start = payload.data(); | ||
| 17 | payload_cursor = payload.data(); | 18 | payload_cursor = payload.data(); |
| 18 | } | 19 | } |
| 19 | 20 | ||
| 20 | UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; | 21 | UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; |
| 21 | 22 | ||
| 22 | void UpdateDescriptorQueue::TickFrame() { | 23 | void UpdateDescriptorQueue::TickFrame() { |
| 23 | payload_cursor = payload.data(); | 24 | if (++frame_index >= FRAMES_IN_FLIGHT) { |
| 25 | frame_index = 0; | ||
| 26 | } | ||
| 27 | payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE; | ||
| 28 | payload_cursor = payload_start; | ||
| 24 | } | 29 | } |
| 25 | 30 | ||
| 26 | void UpdateDescriptorQueue::Acquire() { | 31 | void UpdateDescriptorQueue::Acquire() { |
| @@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() { | |||
| 28 | // This is the maximum number of entries a single draw call might use. | 33 | // This is the maximum number of entries a single draw call might use. |
| 29 | static constexpr size_t MIN_ENTRIES = 0x400; | 34 | static constexpr size_t MIN_ENTRIES = 0x400; |
| 30 | 35 | ||
| 31 | if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { | 36 | if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) { |
| 32 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); | 37 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); |
| 33 | scheduler.WaitWorker(); | 38 | scheduler.WaitWorker(); |
| 34 | payload_cursor = payload.data(); | 39 | payload_cursor = payload_start; |
| 35 | } | 40 | } |
| 36 | upload_start = payload_cursor; | 41 | upload_start = payload_cursor; |
| 37 | } | 42 | } |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 625bcc809..1c1a7020b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -29,6 +29,12 @@ struct DescriptorUpdateEntry { | |||
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | class UpdateDescriptorQueue final { | 31 | class UpdateDescriptorQueue final { |
| 32 | // This should be plenty for the vast majority of cases. Most desktop platforms only | ||
| 33 | // provide up to 3 swapchain images. | ||
| 34 | static constexpr size_t FRAMES_IN_FLIGHT = 5; | ||
| 35 | static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000; | ||
| 36 | static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT; | ||
| 37 | |||
| 32 | public: | 38 | public: |
| 33 | explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); | 39 | explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); |
| 34 | ~UpdateDescriptorQueue(); | 40 | ~UpdateDescriptorQueue(); |
| @@ -73,9 +79,11 @@ private: | |||
| 73 | const Device& device; | 79 | const Device& device; |
| 74 | Scheduler& scheduler; | 80 | Scheduler& scheduler; |
| 75 | 81 | ||
| 82 | size_t frame_index{0}; | ||
| 76 | DescriptorUpdateEntry* payload_cursor = nullptr; | 83 | DescriptorUpdateEntry* payload_cursor = nullptr; |
| 84 | DescriptorUpdateEntry* payload_start = nullptr; | ||
| 77 | const DescriptorUpdateEntry* upload_start = nullptr; | 85 | const DescriptorUpdateEntry* upload_start = nullptr; |
| 78 | std::array<DescriptorUpdateEntry, 0x10000> payload; | 86 | std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload; |
| 79 | }; | 87 | }; |
| 80 | 88 | ||
| 81 | } // namespace Vulkan | 89 | } // namespace Vulkan |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e601f8446..f335009d0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -888,7 +888,7 @@ void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* i | |||
| 888 | buffer, | 888 | buffer, |
| 889 | download_map.buffer, | 889 | download_map.buffer, |
| 890 | }; | 890 | }; |
| 891 | std::array buffer_offsets{ | 891 | std::array<u64, 2> buffer_offsets{ |
| 892 | buffer_offset, | 892 | buffer_offset, |
| 893 | download_map.offset, | 893 | download_map.offset, |
| 894 | }; | 894 | }; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 6f288b3f8..6ffca2af2 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -617,7 +617,9 @@ bool Device::ShouldBoostClocks() const { | |||
| 617 | 617 | ||
| 618 | const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; | 618 | const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; |
| 619 | 619 | ||
| 620 | return validated_driver && !is_steam_deck; | 620 | const bool is_debugging = this->HasDebuggingToolAttached(); |
| 621 | |||
| 622 | return validated_driver && !is_steam_deck && !is_debugging; | ||
| 621 | } | 623 | } |
| 622 | 624 | ||
| 623 | bool Device::GetSuitability(bool requires_swapchain) { | 625 | bool Device::GetSuitability(bool requires_swapchain) { |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index bb731276e..be33e4d79 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -497,7 +497,7 @@ void Config::ReadCoreValues() { | |||
| 497 | qt_config->beginGroup(QStringLiteral("Core")); | 497 | qt_config->beginGroup(QStringLiteral("Core")); |
| 498 | 498 | ||
| 499 | ReadGlobalSetting(Settings::values.use_multi_core); | 499 | ReadGlobalSetting(Settings::values.use_multi_core); |
| 500 | ReadGlobalSetting(Settings::values.use_extended_memory_layout); | 500 | ReadGlobalSetting(Settings::values.use_unsafe_extended_memory_layout); |
| 501 | 501 | ||
| 502 | qt_config->endGroup(); | 502 | qt_config->endGroup(); |
| 503 | } | 503 | } |
| @@ -692,6 +692,7 @@ void Config::ReadRendererValues() { | |||
| 692 | qt_config->beginGroup(QStringLiteral("Renderer")); | 692 | qt_config->beginGroup(QStringLiteral("Renderer")); |
| 693 | 693 | ||
| 694 | ReadGlobalSetting(Settings::values.renderer_backend); | 694 | ReadGlobalSetting(Settings::values.renderer_backend); |
| 695 | ReadGlobalSetting(Settings::values.async_presentation); | ||
| 695 | ReadGlobalSetting(Settings::values.renderer_force_max_clock); | 696 | ReadGlobalSetting(Settings::values.renderer_force_max_clock); |
| 696 | ReadGlobalSetting(Settings::values.vulkan_device); | 697 | ReadGlobalSetting(Settings::values.vulkan_device); |
| 697 | ReadGlobalSetting(Settings::values.fullscreen_mode); | 698 | ReadGlobalSetting(Settings::values.fullscreen_mode); |
| @@ -1161,7 +1162,7 @@ void Config::SaveCoreValues() { | |||
| 1161 | qt_config->beginGroup(QStringLiteral("Core")); | 1162 | qt_config->beginGroup(QStringLiteral("Core")); |
| 1162 | 1163 | ||
| 1163 | WriteGlobalSetting(Settings::values.use_multi_core); | 1164 | WriteGlobalSetting(Settings::values.use_multi_core); |
| 1164 | WriteGlobalSetting(Settings::values.use_extended_memory_layout); | 1165 | WriteGlobalSetting(Settings::values.use_unsafe_extended_memory_layout); |
| 1165 | 1166 | ||
| 1166 | qt_config->endGroup(); | 1167 | qt_config->endGroup(); |
| 1167 | } | 1168 | } |
| @@ -1313,6 +1314,7 @@ void Config::SaveRendererValues() { | |||
| 1313 | static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), | 1314 | static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), |
| 1314 | static_cast<u32>(Settings::values.renderer_backend.GetDefault()), | 1315 | static_cast<u32>(Settings::values.renderer_backend.GetDefault()), |
| 1315 | Settings::values.renderer_backend.UsingGlobal()); | 1316 | Settings::values.renderer_backend.UsingGlobal()); |
| 1317 | WriteGlobalSetting(Settings::values.async_presentation); | ||
| 1316 | WriteGlobalSetting(Settings::values.renderer_force_max_clock); | 1318 | WriteGlobalSetting(Settings::values.renderer_force_max_clock); |
| 1317 | WriteGlobalSetting(Settings::values.vulkan_device); | 1319 | WriteGlobalSetting(Settings::values.vulkan_device); |
| 1318 | WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), | 1320 | WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), |
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp index 207bcdc4d..26258d744 100644 --- a/src/yuzu/configuration/configure_general.cpp +++ b/src/yuzu/configuration/configure_general.cpp | |||
| @@ -35,9 +35,6 @@ void ConfigureGeneral::SetConfiguration() { | |||
| 35 | 35 | ||
| 36 | ui->use_multi_core->setEnabled(runtime_lock); | 36 | ui->use_multi_core->setEnabled(runtime_lock); |
| 37 | ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); | 37 | ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); |
| 38 | ui->use_extended_memory_layout->setEnabled(runtime_lock); | ||
| 39 | ui->use_extended_memory_layout->setChecked( | ||
| 40 | Settings::values.use_extended_memory_layout.GetValue()); | ||
| 41 | 38 | ||
| 42 | ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue()); | 39 | ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue()); |
| 43 | ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue()); | 40 | ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue()); |
| @@ -79,9 +76,6 @@ void ConfigureGeneral::ResetDefaults() { | |||
| 79 | void ConfigureGeneral::ApplyConfiguration() { | 76 | void ConfigureGeneral::ApplyConfiguration() { |
| 80 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core, | 77 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core, |
| 81 | use_multi_core); | 78 | use_multi_core); |
| 82 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_extended_memory_layout, | ||
| 83 | ui->use_extended_memory_layout, | ||
| 84 | use_extended_memory_layout); | ||
| 85 | 79 | ||
| 86 | if (Settings::IsConfiguringGlobal()) { | 80 | if (Settings::IsConfiguringGlobal()) { |
| 87 | UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); | 81 | UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); |
| @@ -141,9 +135,6 @@ void ConfigureGeneral::SetupPerGameUI() { | |||
| 141 | Settings::values.use_speed_limit, use_speed_limit); | 135 | Settings::values.use_speed_limit, use_speed_limit); |
| 142 | ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core, | 136 | ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core, |
| 143 | use_multi_core); | 137 | use_multi_core); |
| 144 | ConfigurationShared::SetColoredTristate(ui->use_extended_memory_layout, | ||
| 145 | Settings::values.use_extended_memory_layout, | ||
| 146 | use_extended_memory_layout); | ||
| 147 | 138 | ||
| 148 | connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() { | 139 | connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() { |
| 149 | ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() && | 140 | ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() && |
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h index a090c1a3f..7ff63f425 100644 --- a/src/yuzu/configuration/configure_general.h +++ b/src/yuzu/configuration/configure_general.h | |||
| @@ -47,7 +47,6 @@ private: | |||
| 47 | 47 | ||
| 48 | ConfigurationShared::CheckState use_speed_limit; | 48 | ConfigurationShared::CheckState use_speed_limit; |
| 49 | ConfigurationShared::CheckState use_multi_core; | 49 | ConfigurationShared::CheckState use_multi_core; |
| 50 | ConfigurationShared::CheckState use_extended_memory_layout; | ||
| 51 | 50 | ||
| 52 | const Core::System& system; | 51 | const Core::System& system; |
| 53 | }; | 52 | }; |
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui index add110bb0..986a1625b 100644 --- a/src/yuzu/configuration/configure_general.ui +++ b/src/yuzu/configuration/configure_general.ui | |||
| @@ -62,13 +62,6 @@ | |||
| 62 | </widget> | 62 | </widget> |
| 63 | </item> | 63 | </item> |
| 64 | <item> | 64 | <item> |
| 65 | <widget class="QCheckBox" name="use_extended_memory_layout"> | ||
| 66 | <property name="text"> | ||
| 67 | <string>Extended memory layout (8GB DRAM)</string> | ||
| 68 | </property> | ||
| 69 | </widget> | ||
| 70 | </item> | ||
| 71 | <item> | ||
| 72 | <widget class="QCheckBox" name="toggle_check_exit"> | 65 | <widget class="QCheckBox" name="toggle_check_exit"> |
| 73 | <property name="text"> | 66 | <property name="text"> |
| 74 | <string>Confirm exit while emulation is running</string> | 67 | <string>Confirm exit while emulation is running</string> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 59fb1b334..7f7bf0e4d 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -22,11 +22,13 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | |||
| 22 | void ConfigureGraphicsAdvanced::SetConfiguration() { | 22 | void ConfigureGraphicsAdvanced::SetConfiguration() { |
| 23 | const bool runtime_lock = !system.IsPoweredOn(); | 23 | const bool runtime_lock = !system.IsPoweredOn(); |
| 24 | ui->use_vsync->setEnabled(runtime_lock); | 24 | ui->use_vsync->setEnabled(runtime_lock); |
| 25 | ui->async_present->setEnabled(runtime_lock); | ||
| 25 | ui->renderer_force_max_clock->setEnabled(runtime_lock); | 26 | ui->renderer_force_max_clock->setEnabled(runtime_lock); |
| 26 | ui->async_astc->setEnabled(runtime_lock); | 27 | ui->async_astc->setEnabled(runtime_lock); |
| 27 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); | 28 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); |
| 28 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); | 29 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); |
| 29 | 30 | ||
| 31 | ui->async_present->setChecked(Settings::values.async_presentation.GetValue()); | ||
| 30 | ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); | 32 | ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); |
| 31 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); | 33 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); |
| 32 | ui->async_astc->setChecked(Settings::values.async_astc.GetValue()); | 34 | ui->async_astc->setChecked(Settings::values.async_astc.GetValue()); |
| @@ -54,6 +56,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 54 | 56 | ||
| 55 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { | 57 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { |
| 56 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); | 58 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); |
| 59 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation, | ||
| 60 | ui->async_present, async_present); | ||
| 57 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, | 61 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, |
| 58 | ui->renderer_force_max_clock, | 62 | ui->renderer_force_max_clock, |
| 59 | renderer_force_max_clock); | 63 | renderer_force_max_clock); |
| @@ -90,6 +94,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 90 | // Disable if not global (only happens during game) | 94 | // Disable if not global (only happens during game) |
| 91 | if (Settings::IsConfiguringGlobal()) { | 95 | if (Settings::IsConfiguringGlobal()) { |
| 92 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); | 96 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); |
| 97 | ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal()); | ||
| 93 | ui->renderer_force_max_clock->setEnabled( | 98 | ui->renderer_force_max_clock->setEnabled( |
| 94 | Settings::values.renderer_force_max_clock.UsingGlobal()); | 99 | Settings::values.renderer_force_max_clock.UsingGlobal()); |
| 95 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); | 100 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); |
| @@ -107,6 +112,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 107 | return; | 112 | return; |
| 108 | } | 113 | } |
| 109 | 114 | ||
| 115 | ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation, | ||
| 116 | async_present); | ||
| 110 | ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, | 117 | ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, |
| 111 | Settings::values.renderer_force_max_clock, | 118 | Settings::values.renderer_force_max_clock, |
| 112 | renderer_force_max_clock); | 119 | renderer_force_max_clock); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index bf1b04749..5394ed40a 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h | |||
| @@ -36,6 +36,7 @@ private: | |||
| 36 | 36 | ||
| 37 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; | 37 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; |
| 38 | 38 | ||
| 39 | ConfigurationShared::CheckState async_present; | ||
| 39 | ConfigurationShared::CheckState renderer_force_max_clock; | 40 | ConfigurationShared::CheckState renderer_force_max_clock; |
| 40 | ConfigurationShared::CheckState use_vsync; | 41 | ConfigurationShared::CheckState use_vsync; |
| 41 | ConfigurationShared::CheckState async_astc; | 42 | ConfigurationShared::CheckState async_astc; |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index a7dbdc18c..d7ec18939 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | <x>0</x> | 7 | <x>0</x> |
| 8 | <y>0</y> | 8 | <y>0</y> |
| 9 | <width>404</width> | 9 | <width>404</width> |
| 10 | <height>321</height> | 10 | <height>376</height> |
| 11 | </rect> | 11 | </rect> |
| 12 | </property> | 12 | </property> |
| 13 | <property name="windowTitle"> | 13 | <property name="windowTitle"> |
| @@ -70,6 +70,13 @@ | |||
| 70 | </widget> | 70 | </widget> |
| 71 | </item> | 71 | </item> |
| 72 | <item> | 72 | <item> |
| 73 | <widget class="QCheckBox" name="async_present"> | ||
| 74 | <property name="text"> | ||
| 75 | <string>Enable asynchronous presentation (Vulkan only)</string> | ||
| 76 | </property> | ||
| 77 | </widget> | ||
| 78 | </item> | ||
| 79 | <item> | ||
| 73 | <widget class="QCheckBox" name="renderer_force_max_clock"> | 80 | <widget class="QCheckBox" name="renderer_force_max_clock"> |
| 74 | <property name="toolTip"> | 81 | <property name="toolTip"> |
| 75 | <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> | 82 | <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> |
| @@ -112,7 +119,7 @@ | |||
| 112 | <item> | 119 | <item> |
| 113 | <widget class="QCheckBox" name="use_fast_gpu_time"> | 120 | <widget class="QCheckBox" name="use_fast_gpu_time"> |
| 114 | <property name="toolTip"> | 121 | <property name="toolTip"> |
| 115 | <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> | 122 | <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> |
| 116 | </property> | 123 | </property> |
| 117 | <property name="text"> | 124 | <property name="text"> |
| 118 | <string>Use Fast GPU Time (Hack)</string> | 125 | <string>Use Fast GPU Time (Hack)</string> |
| @@ -122,7 +129,7 @@ | |||
| 122 | <item> | 129 | <item> |
| 123 | <widget class="QCheckBox" name="use_pessimistic_flushes"> | 130 | <widget class="QCheckBox" name="use_pessimistic_flushes"> |
| 124 | <property name="toolTip"> | 131 | <property name="toolTip"> |
| 125 | <string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string> | 132 | <string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string> |
| 126 | </property> | 133 | </property> |
| 127 | <property name="text"> | 134 | <property name="text"> |
| 128 | <string>Use pessimistic buffer flushes (Hack)</string> | 135 | <string>Use pessimistic buffer flushes (Hack)</string> |
| @@ -132,7 +139,7 @@ | |||
| 132 | <item> | 139 | <item> |
| 133 | <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> | 140 | <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> |
| 134 | <property name="toolTip"> | 141 | <property name="toolTip"> |
| 135 | <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> | 142 | <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> |
| 136 | </property> | 143 | </property> |
| 137 | <property name="text"> | 144 | <property name="text"> |
| 138 | <string>Use Vulkan pipeline cache</string> | 145 | <string>Use Vulkan pipeline cache</string> |
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index 6af34f793..286ccc5cd 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp | |||
| @@ -111,6 +111,9 @@ void ConfigureSystem::SetConfiguration() { | |||
| 111 | ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time)); | 111 | ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time)); |
| 112 | ui->device_name_edit->setText( | 112 | ui->device_name_edit->setText( |
| 113 | QString::fromUtf8(Settings::values.device_name.GetValue().c_str())); | 113 | QString::fromUtf8(Settings::values.device_name.GetValue().c_str())); |
| 114 | ui->use_unsafe_extended_memory_layout->setEnabled(enabled); | ||
| 115 | ui->use_unsafe_extended_memory_layout->setChecked( | ||
| 116 | Settings::values.use_unsafe_extended_memory_layout.GetValue()); | ||
| 114 | 117 | ||
| 115 | if (Settings::IsConfiguringGlobal()) { | 118 | if (Settings::IsConfiguringGlobal()) { |
| 116 | ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); | 119 | ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); |
| @@ -160,6 +163,9 @@ void ConfigureSystem::ApplyConfiguration() { | |||
| 160 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); | 163 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); |
| 161 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, | 164 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, |
| 162 | ui->combo_time_zone); | 165 | ui->combo_time_zone); |
| 166 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_unsafe_extended_memory_layout, | ||
| 167 | ui->use_unsafe_extended_memory_layout, | ||
| 168 | use_unsafe_extended_memory_layout); | ||
| 163 | 169 | ||
| 164 | if (Settings::IsConfiguringGlobal()) { | 170 | if (Settings::IsConfiguringGlobal()) { |
| 165 | // Guard if during game and set to game-specific value | 171 | // Guard if during game and set to game-specific value |
| @@ -215,6 +221,10 @@ void ConfigureSystem::SetupPerGameUI() { | |||
| 215 | Settings::values.rng_seed.GetValue().has_value(), | 221 | Settings::values.rng_seed.GetValue().has_value(), |
| 216 | Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed); | 222 | Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed); |
| 217 | 223 | ||
| 224 | ConfigurationShared::SetColoredTristate(ui->use_unsafe_extended_memory_layout, | ||
| 225 | Settings::values.use_unsafe_extended_memory_layout, | ||
| 226 | use_unsafe_extended_memory_layout); | ||
| 227 | |||
| 218 | ui->custom_rtc_checkbox->setVisible(false); | 228 | ui->custom_rtc_checkbox->setVisible(false); |
| 219 | ui->custom_rtc_edit->setVisible(false); | 229 | ui->custom_rtc_edit->setVisible(false); |
| 220 | } | 230 | } |
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index ec28724a1..ce1a91601 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h | |||
| @@ -41,6 +41,7 @@ private: | |||
| 41 | bool enabled = false; | 41 | bool enabled = false; |
| 42 | 42 | ||
| 43 | ConfigurationShared::CheckState use_rng_seed; | 43 | ConfigurationShared::CheckState use_rng_seed; |
| 44 | ConfigurationShared::CheckState use_unsafe_extended_memory_layout; | ||
| 44 | 45 | ||
| 45 | Core::System& system; | 46 | Core::System& system; |
| 46 | }; | 47 | }; |
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui index 9e7bc3b93..e0caecd5e 100644 --- a/src/yuzu/configuration/configure_system.ui +++ b/src/yuzu/configuration/configure_system.ui | |||
| @@ -478,6 +478,13 @@ | |||
| 478 | </property> | 478 | </property> |
| 479 | </widget> | 479 | </widget> |
| 480 | </item> | 480 | </item> |
| 481 | <item row="7" column="0"> | ||
| 482 | <widget class="QCheckBox" name="use_unsafe_extended_memory_layout"> | ||
| 483 | <property name="text"> | ||
| 484 | <string>Unsafe extended memory layout (8GB DRAM)</string> | ||
| 485 | </property> | ||
| 486 | </widget> | ||
| 487 | </item> | ||
| 481 | </layout> | 488 | </layout> |
| 482 | </item> | 489 | </item> |
| 483 | </layout> | 490 | </layout> |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b79409a68..ba9eece1d 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "configuration/configure_input.h" | 27 | #include "configuration/configure_input.h" |
| 28 | #include "configuration/configure_per_game.h" | 28 | #include "configuration/configure_per_game.h" |
| 29 | #include "configuration/configure_tas.h" | 29 | #include "configuration/configure_tas.h" |
| 30 | #include "core/file_sys/romfs_factory.h" | ||
| 30 | #include "core/file_sys/vfs.h" | 31 | #include "core/file_sys/vfs.h" |
| 31 | #include "core/file_sys/vfs_real.h" | 32 | #include "core/file_sys/vfs_real.h" |
| 32 | #include "core/frontend/applets/cabinet.h" | 33 | #include "core/frontend/applets/cabinet.h" |
| @@ -4171,6 +4172,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 4171 | } | 4172 | } |
| 4172 | 4173 | ||
| 4173 | Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); | 4174 | Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); |
| 4175 | bool all_keys_present{true}; | ||
| 4176 | |||
| 4174 | if (keys.BaseDeriveNecessary()) { | 4177 | if (keys.BaseDeriveNecessary()) { |
| 4175 | Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)}; | 4178 | Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)}; |
| 4176 | 4179 | ||
| @@ -4195,6 +4198,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 4195 | errors += tr(" - Missing PRODINFO"); | 4198 | errors += tr(" - Missing PRODINFO"); |
| 4196 | } | 4199 | } |
| 4197 | if (!errors.isEmpty()) { | 4200 | if (!errors.isEmpty()) { |
| 4201 | all_keys_present = false; | ||
| 4198 | QMessageBox::warning( | 4202 | QMessageBox::warning( |
| 4199 | this, tr("Derivation Components Missing"), | 4203 | this, tr("Derivation Components Missing"), |
| 4200 | tr("Encryption keys are missing. " | 4204 | tr("Encryption keys are missing. " |
| @@ -4222,11 +4226,40 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) { | |||
| 4222 | 4226 | ||
| 4223 | system->GetFileSystemController().CreateFactories(*vfs); | 4227 | system->GetFileSystemController().CreateFactories(*vfs); |
| 4224 | 4228 | ||
| 4229 | if (all_keys_present && !this->CheckSystemArchiveDecryption()) { | ||
| 4230 | LOG_WARNING(Frontend, "Mii model decryption failed"); | ||
| 4231 | QMessageBox::warning( | ||
| 4232 | this, tr("System Archive Decryption Failed"), | ||
| 4233 | tr("Encryption keys failed to decrypt firmware. " | ||
| 4234 | "<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu " | ||
| 4235 | "quickstart guide</a> to get all your keys, firmware and " | ||
| 4236 | "games.")); | ||
| 4237 | } | ||
| 4238 | |||
| 4225 | if (behavior == ReinitializeKeyBehavior::Warning) { | 4239 | if (behavior == ReinitializeKeyBehavior::Warning) { |
| 4226 | game_list->PopulateAsync(UISettings::values.game_dirs); | 4240 | game_list->PopulateAsync(UISettings::values.game_dirs); |
| 4227 | } | 4241 | } |
| 4228 | } | 4242 | } |
| 4229 | 4243 | ||
| 4244 | bool GMainWindow::CheckSystemArchiveDecryption() { | ||
| 4245 | constexpr u64 MiiModelId = 0x0100000000000802; | ||
| 4246 | |||
| 4247 | auto bis_system = system->GetFileSystemController().GetSystemNANDContents(); | ||
| 4248 | if (!bis_system) { | ||
| 4249 | // Not having system BIS files is not an error. | ||
| 4250 | return true; | ||
| 4251 | } | ||
| 4252 | |||
| 4253 | auto mii_nca = bis_system->GetEntry(MiiModelId, FileSys::ContentRecordType::Data); | ||
| 4254 | if (!mii_nca) { | ||
| 4255 | // Not having the Mii model is not an error. | ||
| 4256 | return true; | ||
| 4257 | } | ||
| 4258 | |||
| 4259 | // Return whether we are able to decrypt the RomFS of the Mii model. | ||
| 4260 | return mii_nca->GetRomFS().get() != nullptr; | ||
| 4261 | } | ||
| 4262 | |||
| 4230 | std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, | 4263 | std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, |
| 4231 | u64 program_id) { | 4264 | u64 program_id) { |
| 4232 | const auto dlc_entries = | 4265 | const auto dlc_entries = |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 8b5c1d747..3bbc31ada 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -392,6 +392,7 @@ private: | |||
| 392 | void LoadTranslation(); | 392 | void LoadTranslation(); |
| 393 | void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); | 393 | void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); |
| 394 | bool CheckDarkMode(); | 394 | bool CheckDarkMode(); |
| 395 | bool CheckSystemArchiveDecryption(); | ||
| 395 | 396 | ||
| 396 | QString GetTasStateDescription() const; | 397 | QString GetTasStateDescription() const; |
| 397 | bool CreateShortcut(const std::string& shortcut_path, const std::string& title, | 398 | bool CreateShortcut(const std::string& shortcut_path, const std::string& title, |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 464da3231..e4f91d07c 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -274,7 +274,7 @@ void Config::ReadValues() { | |||
| 274 | 274 | ||
| 275 | // Core | 275 | // Core |
| 276 | ReadSetting("Core", Settings::values.use_multi_core); | 276 | ReadSetting("Core", Settings::values.use_multi_core); |
| 277 | ReadSetting("Core", Settings::values.use_extended_memory_layout); | 277 | ReadSetting("Core", Settings::values.use_unsafe_extended_memory_layout); |
| 278 | 278 | ||
| 279 | // Cpu | 279 | // Cpu |
| 280 | ReadSetting("Cpu", Settings::values.cpu_accuracy); | 280 | ReadSetting("Cpu", Settings::values.cpu_accuracy); |
| @@ -300,6 +300,7 @@ void Config::ReadValues() { | |||
| 300 | 300 | ||
| 301 | // Renderer | 301 | // Renderer |
| 302 | ReadSetting("Renderer", Settings::values.renderer_backend); | 302 | ReadSetting("Renderer", Settings::values.renderer_backend); |
| 303 | ReadSetting("Renderer", Settings::values.async_presentation); | ||
| 303 | ReadSetting("Renderer", Settings::values.renderer_force_max_clock); | 304 | ReadSetting("Renderer", Settings::values.renderer_force_max_clock); |
| 304 | ReadSetting("Renderer", Settings::values.renderer_debug); | 305 | ReadSetting("Renderer", Settings::values.renderer_debug); |
| 305 | ReadSetting("Renderer", Settings::values.renderer_shader_feedback); | 306 | ReadSetting("Renderer", Settings::values.renderer_shader_feedback); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 209cfc28a..f714eae17 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -163,9 +163,9 @@ keyboard_enabled = | |||
| 163 | # 0: Disabled, 1 (default): Enabled | 163 | # 0: Disabled, 1 (default): Enabled |
| 164 | use_multi_core = | 164 | use_multi_core = |
| 165 | 165 | ||
| 166 | # Enable extended guest system memory layout (8GB DRAM) | 166 | # Enable unsafe extended guest system memory layout (8GB DRAM) |
| 167 | # 0 (default): Disabled, 1: Enabled | 167 | # 0 (default): Disabled, 1: Enabled |
| 168 | use_extended_memory_layout = | 168 | use_unsafe_extended_memory_layout = |
| 169 | 169 | ||
| 170 | [Cpu] | 170 | [Cpu] |
| 171 | # Adjusts various optimizations. | 171 | # Adjusts various optimizations. |
| @@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor = | |||
| 264 | # 0: OpenGL, 1 (default): Vulkan | 264 | # 0: OpenGL, 1 (default): Vulkan |
| 265 | backend = | 265 | backend = |
| 266 | 266 | ||
| 267 | # Whether to enable asynchronous presentation (Vulkan only) | ||
| 268 | # 0 (default): Off, 1: On | ||
| 269 | async_presentation = | ||
| 270 | |||
| 267 | # Enable graphics API debugging mode. | 271 | # Enable graphics API debugging mode. |
| 268 | # 0 (default): Disabled, 1: Enabled | 272 | # 0 (default): Disabled, 1: Enabled |
| 269 | debug = | 273 | debug = |