summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt3
-rw-r--r--src/common/intrusive_list.h631
-rw-r--r--src/common/settings.cpp4
-rw-r--r--src/common/settings.h4
-rw-r--r--src/core/core.cpp6
-rw-r--r--src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp7
-rw-r--r--src/core/hle/kernel/k_event_info.h5
-rw-r--r--src/core/hle/kernel/k_object_name.h8
-rw-r--r--src/core/hle/kernel/k_server_port.h4
-rw-r--r--src/core/hle/kernel/k_server_session.h7
-rw-r--r--src/core/hle/kernel/k_session_request.h4
-rw-r--r--src/core/hle/kernel/k_shared_memory_info.h4
-rw-r--r--src/core/hle/kernel/k_thread.h13
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp3
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp3
-rw-r--r--src/tests/CMakeLists.txt2
-rw-r--r--src/tests/video_core/buffer_base.cpp549
-rw-r--r--src/tests/video_core/memory_tracker.cpp549
-rw-r--r--src/video_core/CMakeLists.txt7
-rw-r--r--src/video_core/buffer_cache/buffer_base.h518
-rw-r--r--src/video_core/buffer_cache/buffer_cache.cpp4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h1002
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h580
-rw-r--r--src/video_core/buffer_cache/memory_tracker_base.h273
-rw-r--r--src/video_core/buffer_cache/word_manager.h462
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp51
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp224
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp457
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h83
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp9
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp49
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h31
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h10
-rw-r--r--src/video_core/texture_cache/texture_cache.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp4
-rw-r--r--src/yuzu/configuration/config.cpp6
-rw-r--r--src/yuzu/configuration/configure_general.cpp9
-rw-r--r--src/yuzu/configuration/configure_general.h1
-rw-r--r--src/yuzu/configuration/configure_general.ui7
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp7
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui15
-rw-r--r--src/yuzu/configuration/configure_system.cpp10
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui7
-rw-r--r--src/yuzu/main.cpp33
-rw-r--r--src/yuzu/main.h1
-rw-r--r--src/yuzu_cmd/config.cpp3
-rw-r--r--src/yuzu_cmd/default_ini.h8
59 files changed, 3775 insertions, 2006 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 312a49f42..5e3a74c0f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -113,6 +113,9 @@ else()
113 113
114 $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init> 114 $<$<CXX_COMPILER_ID:Clang>:-Wno-braced-scalar-init>
115 $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field> 115 $<$<CXX_COMPILER_ID:Clang>:-Wno-unused-private-field>
116 $<$<CXX_COMPILER_ID:Clang>:-Werror=shadow-uncaptured-local>
117 $<$<CXX_COMPILER_ID:Clang>:-Werror=implicit-fallthrough>
118 $<$<CXX_COMPILER_ID:Clang>:-Werror=type-limits>
116 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init> 119 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-braced-scalar-init>
117 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field> 120 $<$<CXX_COMPILER_ID:AppleClang>:-Wno-unused-private-field>
118 ) 121 )
diff --git a/src/common/intrusive_list.h b/src/common/intrusive_list.h
new file mode 100644
index 000000000..d330dc1c2
--- /dev/null
+++ b/src/common/intrusive_list.h
@@ -0,0 +1,631 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/common_funcs.h"
7#include "common/parent_of_member.h"
8
9namespace Common {
10
11// Forward declare implementation class for Node.
12namespace impl {
13
14class IntrusiveListImpl;
15
16}
17
18class IntrusiveListNode {
19 YUZU_NON_COPYABLE(IntrusiveListNode);
20
21private:
22 friend class impl::IntrusiveListImpl;
23
24 IntrusiveListNode* m_prev;
25 IntrusiveListNode* m_next;
26
27public:
28 constexpr IntrusiveListNode() : m_prev(this), m_next(this) {}
29
30 constexpr bool IsLinked() const {
31 return m_next != this;
32 }
33
34private:
35 constexpr void LinkPrev(IntrusiveListNode* node) {
36 // We can't link an already linked node.
37 ASSERT(!node->IsLinked());
38 this->SplicePrev(node, node);
39 }
40
41 constexpr void SplicePrev(IntrusiveListNode* first, IntrusiveListNode* last) {
42 // Splice a range into the list.
43 auto last_prev = last->m_prev;
44 first->m_prev = m_prev;
45 last_prev->m_next = this;
46 m_prev->m_next = first;
47 m_prev = last_prev;
48 }
49
50 constexpr void LinkNext(IntrusiveListNode* node) {
51 // We can't link an already linked node.
52 ASSERT(!node->IsLinked());
53 return this->SpliceNext(node, node);
54 }
55
56 constexpr void SpliceNext(IntrusiveListNode* first, IntrusiveListNode* last) {
57 // Splice a range into the list.
58 auto last_prev = last->m_prev;
59 first->m_prev = this;
60 last_prev->m_next = m_next;
61 m_next->m_prev = last_prev;
62 m_next = first;
63 }
64
65 constexpr void Unlink() {
66 this->Unlink(m_next);
67 }
68
69 constexpr void Unlink(IntrusiveListNode* last) {
70 // Unlink a node from a next node.
71 auto last_prev = last->m_prev;
72 m_prev->m_next = last;
73 last->m_prev = m_prev;
74 last_prev->m_next = this;
75 m_prev = last_prev;
76 }
77
78 constexpr IntrusiveListNode* GetPrev() {
79 return m_prev;
80 }
81
82 constexpr const IntrusiveListNode* GetPrev() const {
83 return m_prev;
84 }
85
86 constexpr IntrusiveListNode* GetNext() {
87 return m_next;
88 }
89
90 constexpr const IntrusiveListNode* GetNext() const {
91 return m_next;
92 }
93};
94// DEPRECATED: static_assert(std::is_literal_type<IntrusiveListNode>::value);
95
96namespace impl {
97
98class IntrusiveListImpl {
99 YUZU_NON_COPYABLE(IntrusiveListImpl);
100
101private:
102 IntrusiveListNode m_root_node;
103
104public:
105 template <bool Const>
106 class Iterator;
107
108 using value_type = IntrusiveListNode;
109 using size_type = size_t;
110 using difference_type = ptrdiff_t;
111 using pointer = value_type*;
112 using const_pointer = const value_type*;
113 using reference = value_type&;
114 using const_reference = const value_type&;
115 using iterator = Iterator<false>;
116 using const_iterator = Iterator<true>;
117 using reverse_iterator = std::reverse_iterator<iterator>;
118 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
119
120 template <bool Const>
121 class Iterator {
122 public:
123 using iterator_category = std::bidirectional_iterator_tag;
124 using value_type = typename IntrusiveListImpl::value_type;
125 using difference_type = typename IntrusiveListImpl::difference_type;
126 using pointer =
127 std::conditional_t<Const, IntrusiveListImpl::const_pointer, IntrusiveListImpl::pointer>;
128 using reference = std::conditional_t<Const, IntrusiveListImpl::const_reference,
129 IntrusiveListImpl::reference>;
130
131 private:
132 pointer m_node;
133
134 public:
135 constexpr explicit Iterator(pointer n) : m_node(n) {}
136
137 constexpr bool operator==(const Iterator& rhs) const {
138 return m_node == rhs.m_node;
139 }
140
141 constexpr pointer operator->() const {
142 return m_node;
143 }
144
145 constexpr reference operator*() const {
146 return *m_node;
147 }
148
149 constexpr Iterator& operator++() {
150 m_node = m_node->m_next;
151 return *this;
152 }
153
154 constexpr Iterator& operator--() {
155 m_node = m_node->m_prev;
156 return *this;
157 }
158
159 constexpr Iterator operator++(int) {
160 const Iterator it{*this};
161 ++(*this);
162 return it;
163 }
164
165 constexpr Iterator operator--(int) {
166 const Iterator it{*this};
167 --(*this);
168 return it;
169 }
170
171 constexpr operator Iterator<true>() const {
172 return Iterator<true>(m_node);
173 }
174
175 constexpr Iterator<false> GetNonConstIterator() const {
176 return Iterator<false>(const_cast<IntrusiveListImpl::pointer>(m_node));
177 }
178 };
179
180public:
181 constexpr IntrusiveListImpl() : m_root_node() {}
182
183 // Iterator accessors.
184 constexpr iterator begin() {
185 return iterator(m_root_node.GetNext());
186 }
187
188 constexpr const_iterator begin() const {
189 return const_iterator(m_root_node.GetNext());
190 }
191
192 constexpr iterator end() {
193 return iterator(std::addressof(m_root_node));
194 }
195
196 constexpr const_iterator end() const {
197 return const_iterator(std::addressof(m_root_node));
198 }
199
200 constexpr iterator iterator_to(reference v) {
201 // Only allow iterator_to for values in lists.
202 ASSERT(v.IsLinked());
203 return iterator(std::addressof(v));
204 }
205
206 constexpr const_iterator iterator_to(const_reference v) const {
207 // Only allow iterator_to for values in lists.
208 ASSERT(v.IsLinked());
209 return const_iterator(std::addressof(v));
210 }
211
212 // Content management.
213 constexpr bool empty() const {
214 return !m_root_node.IsLinked();
215 }
216
217 constexpr size_type size() const {
218 return static_cast<size_type>(std::distance(this->begin(), this->end()));
219 }
220
221 constexpr reference back() {
222 return *m_root_node.GetPrev();
223 }
224
225 constexpr const_reference back() const {
226 return *m_root_node.GetPrev();
227 }
228
229 constexpr reference front() {
230 return *m_root_node.GetNext();
231 }
232
233 constexpr const_reference front() const {
234 return *m_root_node.GetNext();
235 }
236
237 constexpr void push_back(reference node) {
238 m_root_node.LinkPrev(std::addressof(node));
239 }
240
241 constexpr void push_front(reference node) {
242 m_root_node.LinkNext(std::addressof(node));
243 }
244
245 constexpr void pop_back() {
246 m_root_node.GetPrev()->Unlink();
247 }
248
249 constexpr void pop_front() {
250 m_root_node.GetNext()->Unlink();
251 }
252
253 constexpr iterator insert(const_iterator pos, reference node) {
254 pos.GetNonConstIterator()->LinkPrev(std::addressof(node));
255 return iterator(std::addressof(node));
256 }
257
258 constexpr void splice(const_iterator pos, IntrusiveListImpl& o) {
259 splice_impl(pos, o.begin(), o.end());
260 }
261
262 constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first) {
263 const_iterator last(first);
264 std::advance(last, 1);
265 splice_impl(pos, first, last);
266 }
267
268 constexpr void splice(const_iterator pos, IntrusiveListImpl& o, const_iterator first,
269 const_iterator last) {
270 splice_impl(pos, first, last);
271 }
272
273 constexpr iterator erase(const_iterator pos) {
274 if (pos == this->end()) {
275 return this->end();
276 }
277 iterator it(pos.GetNonConstIterator());
278 (it++)->Unlink();
279 return it;
280 }
281
282 constexpr void clear() {
283 while (!this->empty()) {
284 this->pop_front();
285 }
286 }
287
288private:
289 constexpr void splice_impl(const_iterator _pos, const_iterator _first, const_iterator _last) {
290 if (_first == _last) {
291 return;
292 }
293 iterator pos(_pos.GetNonConstIterator());
294 iterator first(_first.GetNonConstIterator());
295 iterator last(_last.GetNonConstIterator());
296 first->Unlink(std::addressof(*last));
297 pos->SplicePrev(std::addressof(*first), std::addressof(*first));
298 }
299};
300
301} // namespace impl
302
303template <class T, class Traits>
304class IntrusiveList {
305 YUZU_NON_COPYABLE(IntrusiveList);
306
307private:
308 impl::IntrusiveListImpl m_impl;
309
310public:
311 template <bool Const>
312 class Iterator;
313
314 using value_type = T;
315 using size_type = size_t;
316 using difference_type = ptrdiff_t;
317 using pointer = value_type*;
318 using const_pointer = const value_type*;
319 using reference = value_type&;
320 using const_reference = const value_type&;
321 using iterator = Iterator<false>;
322 using const_iterator = Iterator<true>;
323 using reverse_iterator = std::reverse_iterator<iterator>;
324 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
325
326 template <bool Const>
327 class Iterator {
328 public:
329 friend class Common::IntrusiveList<T, Traits>;
330
331 using ImplIterator =
332 std::conditional_t<Const, Common::impl::IntrusiveListImpl::const_iterator,
333 Common::impl::IntrusiveListImpl::iterator>;
334
335 using iterator_category = std::bidirectional_iterator_tag;
336 using value_type = typename IntrusiveList::value_type;
337 using difference_type = typename IntrusiveList::difference_type;
338 using pointer =
339 std::conditional_t<Const, IntrusiveList::const_pointer, IntrusiveList::pointer>;
340 using reference =
341 std::conditional_t<Const, IntrusiveList::const_reference, IntrusiveList::reference>;
342
343 private:
344 ImplIterator m_iterator;
345
346 private:
347 constexpr explicit Iterator(ImplIterator it) : m_iterator(it) {}
348
349 constexpr ImplIterator GetImplIterator() const {
350 return m_iterator;
351 }
352
353 public:
354 constexpr bool operator==(const Iterator& rhs) const {
355 return m_iterator == rhs.m_iterator;
356 }
357
358 constexpr pointer operator->() const {
359 return std::addressof(Traits::GetParent(*m_iterator));
360 }
361
362 constexpr reference operator*() const {
363 return Traits::GetParent(*m_iterator);
364 }
365
366 constexpr Iterator& operator++() {
367 ++m_iterator;
368 return *this;
369 }
370
371 constexpr Iterator& operator--() {
372 --m_iterator;
373 return *this;
374 }
375
376 constexpr Iterator operator++(int) {
377 const Iterator it{*this};
378 ++m_iterator;
379 return it;
380 }
381
382 constexpr Iterator operator--(int) {
383 const Iterator it{*this};
384 --m_iterator;
385 return it;
386 }
387
388 constexpr operator Iterator<true>() const {
389 return Iterator<true>(m_iterator);
390 }
391 };
392
393private:
394 static constexpr IntrusiveListNode& GetNode(reference ref) {
395 return Traits::GetNode(ref);
396 }
397
398 static constexpr IntrusiveListNode const& GetNode(const_reference ref) {
399 return Traits::GetNode(ref);
400 }
401
402 static constexpr reference GetParent(IntrusiveListNode& node) {
403 return Traits::GetParent(node);
404 }
405
406 static constexpr const_reference GetParent(IntrusiveListNode const& node) {
407 return Traits::GetParent(node);
408 }
409
410public:
411 constexpr IntrusiveList() : m_impl() {}
412
413 // Iterator accessors.
414 constexpr iterator begin() {
415 return iterator(m_impl.begin());
416 }
417
418 constexpr const_iterator begin() const {
419 return const_iterator(m_impl.begin());
420 }
421
422 constexpr iterator end() {
423 return iterator(m_impl.end());
424 }
425
426 constexpr const_iterator end() const {
427 return const_iterator(m_impl.end());
428 }
429
430 constexpr const_iterator cbegin() const {
431 return this->begin();
432 }
433
434 constexpr const_iterator cend() const {
435 return this->end();
436 }
437
438 constexpr reverse_iterator rbegin() {
439 return reverse_iterator(this->end());
440 }
441
442 constexpr const_reverse_iterator rbegin() const {
443 return const_reverse_iterator(this->end());
444 }
445
446 constexpr reverse_iterator rend() {
447 return reverse_iterator(this->begin());
448 }
449
450 constexpr const_reverse_iterator rend() const {
451 return const_reverse_iterator(this->begin());
452 }
453
454 constexpr const_reverse_iterator crbegin() const {
455 return this->rbegin();
456 }
457
458 constexpr const_reverse_iterator crend() const {
459 return this->rend();
460 }
461
462 constexpr iterator iterator_to(reference v) {
463 return iterator(m_impl.iterator_to(GetNode(v)));
464 }
465
466 constexpr const_iterator iterator_to(const_reference v) const {
467 return const_iterator(m_impl.iterator_to(GetNode(v)));
468 }
469
470 // Content management.
471 constexpr bool empty() const {
472 return m_impl.empty();
473 }
474
475 constexpr size_type size() const {
476 return m_impl.size();
477 }
478
479 constexpr reference back() {
480 return GetParent(m_impl.back());
481 }
482
483 constexpr const_reference back() const {
484 return GetParent(m_impl.back());
485 }
486
487 constexpr reference front() {
488 return GetParent(m_impl.front());
489 }
490
491 constexpr const_reference front() const {
492 return GetParent(m_impl.front());
493 }
494
495 constexpr void push_back(reference ref) {
496 m_impl.push_back(GetNode(ref));
497 }
498
499 constexpr void push_front(reference ref) {
500 m_impl.push_front(GetNode(ref));
501 }
502
503 constexpr void pop_back() {
504 m_impl.pop_back();
505 }
506
507 constexpr void pop_front() {
508 m_impl.pop_front();
509 }
510
511 constexpr iterator insert(const_iterator pos, reference ref) {
512 return iterator(m_impl.insert(pos.GetImplIterator(), GetNode(ref)));
513 }
514
515 constexpr void splice(const_iterator pos, IntrusiveList& o) {
516 m_impl.splice(pos.GetImplIterator(), o.m_impl);
517 }
518
519 constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first) {
520 m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator());
521 }
522
523 constexpr void splice(const_iterator pos, IntrusiveList& o, const_iterator first,
524 const_iterator last) {
525 m_impl.splice(pos.GetImplIterator(), o.m_impl, first.GetImplIterator(),
526 last.GetImplIterator());
527 }
528
529 constexpr iterator erase(const_iterator pos) {
530 return iterator(m_impl.erase(pos.GetImplIterator()));
531 }
532
533 constexpr void clear() {
534 m_impl.clear();
535 }
536};
537
538template <auto T, class Derived = Common::impl::GetParentType<T>>
539class IntrusiveListMemberTraits;
540
541template <class Parent, IntrusiveListNode Parent::*Member, class Derived>
542class IntrusiveListMemberTraits<Member, Derived> {
543public:
544 using ListType = IntrusiveList<Derived, IntrusiveListMemberTraits>;
545
546private:
547 friend class IntrusiveList<Derived, IntrusiveListMemberTraits>;
548
549 static constexpr IntrusiveListNode& GetNode(Derived& parent) {
550 return parent.*Member;
551 }
552
553 static constexpr IntrusiveListNode const& GetNode(Derived const& parent) {
554 return parent.*Member;
555 }
556
557 static Derived& GetParent(IntrusiveListNode& node) {
558 return Common::GetParentReference<Member, Derived>(std::addressof(node));
559 }
560
561 static Derived const& GetParent(IntrusiveListNode const& node) {
562 return Common::GetParentReference<Member, Derived>(std::addressof(node));
563 }
564};
565
566template <auto T, class Derived = Common::impl::GetParentType<T>>
567class IntrusiveListMemberTraitsByNonConstexprOffsetOf;
568
569template <class Parent, IntrusiveListNode Parent::*Member, class Derived>
570class IntrusiveListMemberTraitsByNonConstexprOffsetOf<Member, Derived> {
571public:
572 using ListType = IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>;
573
574private:
575 friend class IntrusiveList<Derived, IntrusiveListMemberTraitsByNonConstexprOffsetOf>;
576
577 static constexpr IntrusiveListNode& GetNode(Derived& parent) {
578 return parent.*Member;
579 }
580
581 static constexpr IntrusiveListNode const& GetNode(Derived const& parent) {
582 return parent.*Member;
583 }
584
585 static Derived& GetParent(IntrusiveListNode& node) {
586 return *reinterpret_cast<Derived*>(reinterpret_cast<char*>(std::addressof(node)) -
587 GetOffset());
588 }
589
590 static Derived const& GetParent(IntrusiveListNode const& node) {
591 return *reinterpret_cast<const Derived*>(
592 reinterpret_cast<const char*>(std::addressof(node)) - GetOffset());
593 }
594
595 static uintptr_t GetOffset() {
596 return reinterpret_cast<uintptr_t>(std::addressof(reinterpret_cast<Derived*>(0)->*Member));
597 }
598};
599
600template <class Derived>
601class IntrusiveListBaseNode : public IntrusiveListNode {};
602
603template <class Derived>
604class IntrusiveListBaseTraits {
605public:
606 using ListType = IntrusiveList<Derived, IntrusiveListBaseTraits>;
607
608private:
609 friend class IntrusiveList<Derived, IntrusiveListBaseTraits>;
610
611 static constexpr IntrusiveListNode& GetNode(Derived& parent) {
612 return static_cast<IntrusiveListNode&>(
613 static_cast<IntrusiveListBaseNode<Derived>&>(parent));
614 }
615
616 static constexpr IntrusiveListNode const& GetNode(Derived const& parent) {
617 return static_cast<const IntrusiveListNode&>(
618 static_cast<const IntrusiveListBaseNode<Derived>&>(parent));
619 }
620
621 static constexpr Derived& GetParent(IntrusiveListNode& node) {
622 return static_cast<Derived&>(static_cast<IntrusiveListBaseNode<Derived>&>(node));
623 }
624
625 static constexpr Derived const& GetParent(IntrusiveListNode const& node) {
626 return static_cast<const Derived&>(
627 static_cast<const IntrusiveListBaseNode<Derived>&>(node));
628 }
629};
630
631} // namespace Common
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 84955030b..cb1bca467 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -45,6 +45,7 @@ void LogSettings() {
45 log_setting("System_LanguageIndex", values.language_index.GetValue()); 45 log_setting("System_LanguageIndex", values.language_index.GetValue());
46 log_setting("System_RegionIndex", values.region_index.GetValue()); 46 log_setting("System_RegionIndex", values.region_index.GetValue());
47 log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue()); 47 log_setting("System_TimeZoneIndex", values.time_zone_index.GetValue());
48 log_setting("System_UnsafeMemoryLayout", values.use_unsafe_extended_memory_layout.GetValue());
48 log_setting("Core_UseMultiCore", values.use_multi_core.GetValue()); 49 log_setting("Core_UseMultiCore", values.use_multi_core.GetValue());
49 log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue()); 50 log_setting("CPU_Accuracy", values.cpu_accuracy.GetValue());
50 log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue()); 51 log_setting("Renderer_UseResolutionScaling", values.resolution_setup.GetValue());
@@ -191,7 +192,7 @@ void RestoreGlobalState(bool is_powered_on) {
191 192
192 // Core 193 // Core
193 values.use_multi_core.SetGlobal(true); 194 values.use_multi_core.SetGlobal(true);
194 values.use_extended_memory_layout.SetGlobal(true); 195 values.use_unsafe_extended_memory_layout.SetGlobal(true);
195 196
196 // CPU 197 // CPU
197 values.cpu_accuracy.SetGlobal(true); 198 values.cpu_accuracy.SetGlobal(true);
@@ -205,6 +206,7 @@ void RestoreGlobalState(bool is_powered_on) {
205 // Renderer 206 // Renderer
206 values.fsr_sharpening_slider.SetGlobal(true); 207 values.fsr_sharpening_slider.SetGlobal(true);
207 values.renderer_backend.SetGlobal(true); 208 values.renderer_backend.SetGlobal(true);
209 values.async_presentation.SetGlobal(true);
208 values.renderer_force_max_clock.SetGlobal(true); 210 values.renderer_force_max_clock.SetGlobal(true);
209 values.vulkan_device.SetGlobal(true); 211 values.vulkan_device.SetGlobal(true);
210 values.fullscreen_mode.SetGlobal(true); 212 values.fullscreen_mode.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index b77a1580a..adebb0ca7 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -388,7 +388,8 @@ struct Values {
388 388
389 // Core 389 // Core
390 SwitchableSetting<bool> use_multi_core{true, "use_multi_core"}; 390 SwitchableSetting<bool> use_multi_core{true, "use_multi_core"};
391 SwitchableSetting<bool> use_extended_memory_layout{false, "use_extended_memory_layout"}; 391 SwitchableSetting<bool> use_unsafe_extended_memory_layout{false,
392 "use_unsafe_extended_memory_layout"};
392 393
393 // Cpu 394 // Cpu
394 SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto, 395 SwitchableSetting<CPUAccuracy, true> cpu_accuracy{CPUAccuracy::Auto, CPUAccuracy::Auto,
@@ -422,6 +423,7 @@ struct Values {
422 // Renderer 423 // Renderer
423 SwitchableSetting<RendererBackend, true> renderer_backend{ 424 SwitchableSetting<RendererBackend, true> renderer_backend{
424 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; 425 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
426 SwitchableSetting<bool> async_presentation{false, "async_presentation"};
425 SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"}; 427 SwitchableSetting<bool> renderer_force_max_clock{false, "force_max_clock"};
426 Setting<bool> renderer_debug{false, "debug"}; 428 Setting<bool> renderer_debug{false, "debug"};
427 Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; 429 Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
diff --git a/src/core/core.cpp b/src/core/core.cpp
index d7bf2bf51..06fba4ce5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -137,7 +137,7 @@ struct System::Impl {
137 device_memory = std::make_unique<Core::DeviceMemory>(); 137 device_memory = std::make_unique<Core::DeviceMemory>();
138 138
139 is_multicore = Settings::values.use_multi_core.GetValue(); 139 is_multicore = Settings::values.use_multi_core.GetValue();
140 extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); 140 extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
141 141
142 core_timing.SetMulticore(is_multicore); 142 core_timing.SetMulticore(is_multicore);
143 core_timing.Initialize([&system]() { system.RegisterHostThread(); }); 143 core_timing.Initialize([&system]() { system.RegisterHostThread(); });
@@ -169,7 +169,7 @@ struct System::Impl {
169 void ReinitializeIfNecessary(System& system) { 169 void ReinitializeIfNecessary(System& system) {
170 const bool must_reinitialize = 170 const bool must_reinitialize =
171 is_multicore != Settings::values.use_multi_core.GetValue() || 171 is_multicore != Settings::values.use_multi_core.GetValue() ||
172 extended_memory_layout != Settings::values.use_extended_memory_layout.GetValue(); 172 extended_memory_layout != Settings::values.use_unsafe_extended_memory_layout.GetValue();
173 173
174 if (!must_reinitialize) { 174 if (!must_reinitialize) {
175 return; 175 return;
@@ -178,7 +178,7 @@ struct System::Impl {
178 LOG_DEBUG(Kernel, "Re-initializing"); 178 LOG_DEBUG(Kernel, "Re-initializing");
179 179
180 is_multicore = Settings::values.use_multi_core.GetValue(); 180 is_multicore = Settings::values.use_multi_core.GetValue();
181 extended_memory_layout = Settings::values.use_extended_memory_layout.GetValue(); 181 extended_memory_layout = Settings::values.use_unsafe_extended_memory_layout.GetValue();
182 182
183 Initialize(system); 183 Initialize(system);
184 } 184 }
diff --git a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
index 36d0d20d2..49bdc671e 100644
--- a/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
+++ b/src/core/hle/kernel/board/nintendo/nx/k_system_control.cpp
@@ -35,12 +35,13 @@ namespace {
35using namespace Common::Literals; 35using namespace Common::Literals;
36 36
37u32 GetMemorySizeForInit() { 37u32 GetMemorySizeForInit() {
38 return Settings::values.use_extended_memory_layout ? Smc::MemorySize_8GB : Smc::MemorySize_4GB; 38 return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemorySize_8GB
39 : Smc::MemorySize_4GB;
39} 40}
40 41
41Smc::MemoryArrangement GetMemoryArrangeForInit() { 42Smc::MemoryArrangement GetMemoryArrangeForInit() {
42 return Settings::values.use_extended_memory_layout ? Smc::MemoryArrangement_8GB 43 return Settings::values.use_unsafe_extended_memory_layout ? Smc::MemoryArrangement_8GB
43 : Smc::MemoryArrangement_4GB; 44 : Smc::MemoryArrangement_4GB;
44} 45}
45} // namespace 46} // namespace
46 47
diff --git a/src/core/hle/kernel/k_event_info.h b/src/core/hle/kernel/k_event_info.h
index 25b3ff594..eacfa5dc6 100644
--- a/src/core/hle/kernel/k_event_info.h
+++ b/src/core/hle/kernel/k_event_info.h
@@ -5,14 +5,15 @@
5 5
6#include <array> 6#include <array>
7 7
8#include <boost/intrusive/list.hpp> 8#include "common/intrusive_list.h"
9 9
10#include "core/hle/kernel/slab_helpers.h" 10#include "core/hle/kernel/slab_helpers.h"
11#include "core/hle/kernel/svc_types.h" 11#include "core/hle/kernel/svc_types.h"
12 12
13namespace Kernel { 13namespace Kernel {
14 14
15class KEventInfo : public KSlabAllocated<KEventInfo>, public boost::intrusive::list_base_hook<> { 15class KEventInfo : public KSlabAllocated<KEventInfo>,
16 public Common::IntrusiveListBaseNode<KEventInfo> {
16public: 17public:
17 struct InfoCreateThread { 18 struct InfoCreateThread {
18 u32 thread_id{}; 19 u32 thread_id{};
diff --git a/src/core/hle/kernel/k_object_name.h b/src/core/hle/kernel/k_object_name.h
index 2d97fc777..a8876fe37 100644
--- a/src/core/hle/kernel/k_object_name.h
+++ b/src/core/hle/kernel/k_object_name.h
@@ -5,7 +5,8 @@
5 5
6#include <array> 6#include <array>
7#include <memory> 7#include <memory>
8#include <boost/intrusive/list.hpp> 8
9#include "common/intrusive_list.h"
9 10
10#include "core/hle/kernel/k_light_lock.h" 11#include "core/hle/kernel/k_light_lock.h"
11#include "core/hle/kernel/slab_helpers.h" 12#include "core/hle/kernel/slab_helpers.h"
@@ -15,13 +16,14 @@ namespace Kernel {
15 16
16class KObjectNameGlobalData; 17class KObjectNameGlobalData;
17 18
18class KObjectName : public KSlabAllocated<KObjectName>, public boost::intrusive::list_base_hook<> { 19class KObjectName : public KSlabAllocated<KObjectName>,
20 public Common::IntrusiveListBaseNode<KObjectName> {
19public: 21public:
20 explicit KObjectName(KernelCore&) {} 22 explicit KObjectName(KernelCore&) {}
21 virtual ~KObjectName() = default; 23 virtual ~KObjectName() = default;
22 24
23 static constexpr size_t NameLengthMax = 12; 25 static constexpr size_t NameLengthMax = 12;
24 using List = boost::intrusive::list<KObjectName>; 26 using List = Common::IntrusiveListBaseTraits<KObjectName>::ListType;
25 27
26 static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name); 28 static Result NewFromName(KernelCore& kernel, KAutoObject* obj, const char* name);
27 static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name); 29 static Result Delete(KernelCore& kernel, KAutoObject* obj, const char* name);
diff --git a/src/core/hle/kernel/k_server_port.h b/src/core/hle/kernel/k_server_port.h
index 21c040e62..625280290 100644
--- a/src/core/hle/kernel/k_server_port.h
+++ b/src/core/hle/kernel/k_server_port.h
@@ -7,7 +7,7 @@
7#include <string> 7#include <string>
8#include <utility> 8#include <utility>
9 9
10#include <boost/intrusive/list.hpp> 10#include "common/intrusive_list.h"
11 11
12#include "core/hle/kernel/k_server_session.h" 12#include "core/hle/kernel/k_server_session.h"
13#include "core/hle/kernel/k_synchronization_object.h" 13#include "core/hle/kernel/k_synchronization_object.h"
@@ -42,7 +42,7 @@ public:
42 bool IsSignaled() const override; 42 bool IsSignaled() const override;
43 43
44private: 44private:
45 using SessionList = boost::intrusive::list<KServerSession>; 45 using SessionList = Common::IntrusiveListBaseTraits<KServerSession>::ListType;
46 46
47 void CleanupSessions(); 47 void CleanupSessions();
48 48
diff --git a/src/core/hle/kernel/k_server_session.h b/src/core/hle/kernel/k_server_session.h
index 5ee02f556..403891919 100644
--- a/src/core/hle/kernel/k_server_session.h
+++ b/src/core/hle/kernel/k_server_session.h
@@ -8,7 +8,7 @@
8#include <string> 8#include <string>
9#include <utility> 9#include <utility>
10 10
11#include <boost/intrusive/list.hpp> 11#include "common/intrusive_list.h"
12 12
13#include "core/hle/kernel/k_light_lock.h" 13#include "core/hle/kernel/k_light_lock.h"
14#include "core/hle/kernel/k_session_request.h" 14#include "core/hle/kernel/k_session_request.h"
@@ -27,7 +27,7 @@ class KSession;
27class KThread; 27class KThread;
28 28
29class KServerSession final : public KSynchronizationObject, 29class KServerSession final : public KSynchronizationObject,
30 public boost::intrusive::list_base_hook<> { 30 public Common::IntrusiveListBaseNode<KServerSession> {
31 KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject); 31 KERNEL_AUTOOBJECT_TRAITS(KServerSession, KSynchronizationObject);
32 32
33 friend class ServiceThread; 33 friend class ServiceThread;
@@ -67,7 +67,8 @@ private:
67 KSession* m_parent{}; 67 KSession* m_parent{};
68 68
69 /// List of threads which are pending a reply. 69 /// List of threads which are pending a reply.
70 boost::intrusive::list<KSessionRequest> m_request_list{}; 70 using RequestList = Common::IntrusiveListBaseTraits<KSessionRequest>::ListType;
71 RequestList m_request_list{};
71 KSessionRequest* m_current_request{}; 72 KSessionRequest* m_current_request{};
72 73
73 KLightLock m_lock; 74 KLightLock m_lock;
diff --git a/src/core/hle/kernel/k_session_request.h b/src/core/hle/kernel/k_session_request.h
index b5f04907b..283669e0a 100644
--- a/src/core/hle/kernel/k_session_request.h
+++ b/src/core/hle/kernel/k_session_request.h
@@ -5,6 +5,8 @@
5 5
6#include <array> 6#include <array>
7 7
8#include "common/intrusive_list.h"
9
8#include "core/hle/kernel/k_auto_object.h" 10#include "core/hle/kernel/k_auto_object.h"
9#include "core/hle/kernel/k_event.h" 11#include "core/hle/kernel/k_event.h"
10#include "core/hle/kernel/k_memory_block.h" 12#include "core/hle/kernel/k_memory_block.h"
@@ -16,7 +18,7 @@ namespace Kernel {
16 18
17class KSessionRequest final : public KSlabAllocated<KSessionRequest>, 19class KSessionRequest final : public KSlabAllocated<KSessionRequest>,
18 public KAutoObject, 20 public KAutoObject,
19 public boost::intrusive::list_base_hook<> { 21 public Common::IntrusiveListBaseNode<KSessionRequest> {
20 KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject); 22 KERNEL_AUTOOBJECT_TRAITS(KSessionRequest, KAutoObject);
21 23
22public: 24public:
diff --git a/src/core/hle/kernel/k_shared_memory_info.h b/src/core/hle/kernel/k_shared_memory_info.h
index 75b73ba39..2d8ff20d6 100644
--- a/src/core/hle/kernel/k_shared_memory_info.h
+++ b/src/core/hle/kernel/k_shared_memory_info.h
@@ -3,7 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <boost/intrusive/list.hpp> 6#include "common/intrusive_list.h"
7 7
8#include "core/hle/kernel/slab_helpers.h" 8#include "core/hle/kernel/slab_helpers.h"
9 9
@@ -12,7 +12,7 @@ namespace Kernel {
12class KSharedMemory; 12class KSharedMemory;
13 13
14class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>, 14class KSharedMemoryInfo final : public KSlabAllocated<KSharedMemoryInfo>,
15 public boost::intrusive::list_base_hook<> { 15 public Common::IntrusiveListBaseNode<KSharedMemoryInfo> {
16 16
17public: 17public:
18 explicit KSharedMemoryInfo(KernelCore&) {} 18 explicit KSharedMemoryInfo(KernelCore&) {}
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index 9c1a41128..f9814ac8f 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -12,7 +12,7 @@
12#include <utility> 12#include <utility>
13#include <vector> 13#include <vector>
14 14
15#include <boost/intrusive/list.hpp> 15#include "common/intrusive_list.h"
16 16
17#include "common/intrusive_red_black_tree.h" 17#include "common/intrusive_red_black_tree.h"
18#include "common/spin_lock.h" 18#include "common/spin_lock.h"
@@ -119,7 +119,7 @@ s32 GetCurrentCoreId(KernelCore& kernel);
119Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel); 119Core::Memory::Memory& GetCurrentMemory(KernelCore& kernel);
120 120
121class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>, 121class KThread final : public KAutoObjectWithSlabHeapAndContainer<KThread, KWorkerTask>,
122 public boost::intrusive::list_base_hook<>, 122 public Common::IntrusiveListBaseNode<KThread>,
123 public KTimerTask { 123 public KTimerTask {
124 KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject); 124 KERNEL_AUTOOBJECT_TRAITS(KThread, KSynchronizationObject);
125 125
@@ -138,7 +138,7 @@ public:
138public: 138public:
139 using ThreadContext32 = Core::ARM_Interface::ThreadContext32; 139 using ThreadContext32 = Core::ARM_Interface::ThreadContext32;
140 using ThreadContext64 = Core::ARM_Interface::ThreadContext64; 140 using ThreadContext64 = Core::ARM_Interface::ThreadContext64;
141 using WaiterList = boost::intrusive::list<KThread>; 141 using WaiterList = Common::IntrusiveListBaseTraits<KThread>::ListType;
142 142
143 /** 143 /**
144 * Gets the thread's current priority 144 * Gets the thread's current priority
@@ -750,8 +750,9 @@ private:
750 ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>; 750 ConditionVariableThreadTreeTraits::TreeType<LockWithPriorityInheritanceComparator>;
751 751
752public: 752public:
753 class LockWithPriorityInheritanceInfo : public KSlabAllocated<LockWithPriorityInheritanceInfo>, 753 class LockWithPriorityInheritanceInfo
754 public boost::intrusive::list_base_hook<> { 754 : public KSlabAllocated<LockWithPriorityInheritanceInfo>,
755 public Common::IntrusiveListBaseNode<LockWithPriorityInheritanceInfo> {
755 public: 756 public:
756 explicit LockWithPriorityInheritanceInfo(KernelCore&) {} 757 explicit LockWithPriorityInheritanceInfo(KernelCore&) {}
757 758
@@ -839,7 +840,7 @@ public:
839 840
840private: 841private:
841 using LockWithPriorityInheritanceInfoList = 842 using LockWithPriorityInheritanceInfoList =
842 boost::intrusive::list<LockWithPriorityInheritanceInfo>; 843 Common::IntrusiveListBaseTraits<LockWithPriorityInheritanceInfo>::ListType;
843 844
844 ConditionVariableThreadTree* m_condvar_tree{}; 845 ConditionVariableThreadTree* m_condvar_tree{};
845 u64 m_condvar_key{}; 846 u64 m_condvar_key{};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 0cd87a48f..fee510f7b 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -473,7 +473,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value) {
473} 473}
474 474
475void EmitSetSampleMask(EmitContext& ctx, Id value) { 475void EmitSetSampleMask(EmitContext& ctx, Id value) {
476 ctx.OpStore(ctx.sample_mask, value); 476 const Id pointer{ctx.OpAccessChain(ctx.output_u32, ctx.sample_mask, ctx.u32_zero_value)};
477 ctx.OpStore(pointer, value);
477} 478}
478 479
479void EmitSetFragDepth(EmitContext& ctx, Id value) { 480void EmitSetFragDepth(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index d48d4860e..47739794f 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -1572,7 +1572,8 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
1572 Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth); 1572 Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
1573 } 1573 }
1574 if (info.stores_sample_mask) { 1574 if (info.stores_sample_mask) {
1575 sample_mask = DefineOutput(*this, U32[1], std::nullopt); 1575 const Id array_type{TypeArray(U32[1], Const(1U))};
1576 sample_mask = DefineOutput(*this, array_type, std::nullopt);
1576 Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask); 1577 Decorate(sample_mask, spv::Decoration::BuiltIn, spv::BuiltIn::SampleMask);
1577 } 1578 }
1578 break; 1579 break;
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 39b774c98..1e158f375 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -15,7 +15,7 @@ add_executable(tests
15 core/core_timing.cpp 15 core/core_timing.cpp
16 core/internal_network/network.cpp 16 core/internal_network/network.cpp
17 precompiled_headers.h 17 precompiled_headers.h
18 video_core/buffer_base.cpp 18 video_core/memory_tracker.cpp
19 input_common/calibration_configuration_job.cpp 19 input_common/calibration_configuration_job.cpp
20) 20)
21 21
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
deleted file mode 100644
index 734dbf4b6..000000000
--- a/src/tests/video_core/buffer_base.cpp
+++ /dev/null
@@ -1,549 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <stdexcept>
5#include <unordered_map>
6
7#include <catch2/catch_test_macros.hpp>
8
9#include "common/alignment.h"
10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_base.h"
12
13namespace {
14using VideoCommon::BufferBase;
15using Range = std::pair<u64, u64>;
16
17constexpr u64 PAGE = 4096;
18constexpr u64 WORD = 4096 * 64;
19
20constexpr VAddr c = 0x1328914000;
21
22class RasterizerInterface {
23public:
24 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
25 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
26 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
27 Core::Memory::YUZU_PAGEBITS};
28 for (u64 page = page_start; page < page_end; ++page) {
29 int& value = page_table[page];
30 value += delta;
31 if (value < 0) {
32 throw std::logic_error{"negative page"};
33 }
34 if (value == 0) {
35 page_table.erase(page);
36 }
37 }
38 }
39
40 [[nodiscard]] int Count(VAddr addr) const noexcept {
41 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS);
42 return it == page_table.end() ? 0 : it->second;
43 }
44
45 [[nodiscard]] unsigned Count() const noexcept {
46 unsigned count = 0;
47 for (const auto& [index, value] : page_table) {
48 count += value;
49 }
50 return count;
51 }
52
53private:
54 std::unordered_map<u64, int> page_table;
55};
56} // Anonymous namespace
57
58TEST_CASE("BufferBase: Small buffer", "[video_core]") {
59 RasterizerInterface rasterizer;
60 BufferBase buffer(rasterizer, c, WORD);
61 REQUIRE(rasterizer.Count() == 0);
62 buffer.UnmarkRegionAsCpuModified(c, WORD);
63 REQUIRE(rasterizer.Count() == WORD / PAGE);
64 REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0});
65
66 buffer.MarkRegionAsCpuModified(c + PAGE, 1);
67 REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2});
68}
69
70TEST_CASE("BufferBase: Large buffer", "[video_core]") {
71 RasterizerInterface rasterizer;
72 BufferBase buffer(rasterizer, c, WORD * 32);
73 buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
74 buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4);
75 REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2});
76 REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8});
77 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE});
78 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE});
79 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
80 Range{WORD * 3 + PAGE * 63, WORD * 4});
81
82 buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
83 buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
84 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
85 Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9});
86
87 buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
88 REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) ==
89 Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7});
90
91 buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
92 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32});
93
94 buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
95 buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
96
97 buffer.UnmarkRegionAsCpuModified(c, WORD * 32);
98 REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
99}
100
101TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") {
102 RasterizerInterface rasterizer;
103 BufferBase buffer(rasterizer, c, PAGE * 2);
104 REQUIRE(rasterizer.Count() == 0);
105 buffer.UnmarkRegionAsCpuModified(c, PAGE);
106 REQUIRE(rasterizer.Count() == 1);
107 buffer.MarkRegionAsCpuModified(c, PAGE * 2);
108 REQUIRE(rasterizer.Count() == 0);
109 buffer.UnmarkRegionAsCpuModified(c, PAGE);
110 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
111 REQUIRE(rasterizer.Count() == 2);
112 buffer.MarkRegionAsCpuModified(c, PAGE * 2);
113 REQUIRE(rasterizer.Count() == 0);
114}
115
116TEST_CASE("BufferBase: Basic range", "[video_core]") {
117 RasterizerInterface rasterizer;
118 BufferBase buffer(rasterizer, c, WORD);
119 buffer.UnmarkRegionAsCpuModified(c, WORD);
120 buffer.MarkRegionAsCpuModified(c, PAGE);
121 int num = 0;
122 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
123 REQUIRE(offset == 0U);
124 REQUIRE(size == PAGE);
125 ++num;
126 });
127 REQUIRE(num == 1U);
128}
129
130TEST_CASE("BufferBase: Border upload", "[video_core]") {
131 RasterizerInterface rasterizer;
132 BufferBase buffer(rasterizer, c, WORD * 2);
133 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
134 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
135 buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
136 REQUIRE(offset == WORD - PAGE);
137 REQUIRE(size == PAGE * 2);
138 });
139}
140
141TEST_CASE("BufferBase: Border upload range", "[video_core]") {
142 RasterizerInterface rasterizer;
143 BufferBase buffer(rasterizer, c, WORD * 2);
144 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
145 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
146 buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
147 REQUIRE(offset == WORD - PAGE);
148 REQUIRE(size == PAGE * 2);
149 });
150 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
151 buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
152 REQUIRE(offset == WORD - PAGE);
153 REQUIRE(size == PAGE);
154 });
155 buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
156 REQUIRE(offset == WORD);
157 REQUIRE(size == PAGE);
158 });
159}
160
161TEST_CASE("BufferBase: Border upload partial range", "[video_core]") {
162 RasterizerInterface rasterizer;
163 BufferBase buffer(rasterizer, c, WORD * 2);
164 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
165 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
166 buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
167 REQUIRE(offset == WORD - PAGE);
168 REQUIRE(size == PAGE * 2);
169 });
170 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
171 buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
172 REQUIRE(offset == WORD - PAGE);
173 REQUIRE(size == PAGE);
174 });
175 buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
176 REQUIRE(offset == WORD);
177 REQUIRE(size == PAGE);
178 });
179}
180
181TEST_CASE("BufferBase: Partial word uploads", "[video_core]") {
182 RasterizerInterface rasterizer;
183 BufferBase buffer(rasterizer, c, 0x9d000);
184 int num = 0;
185 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
186 REQUIRE(offset == 0U);
187 REQUIRE(size == WORD);
188 ++num;
189 });
190 REQUIRE(num == 1);
191 buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
192 REQUIRE(offset == WORD);
193 REQUIRE(size == WORD);
194 ++num;
195 });
196 REQUIRE(num == 2);
197 buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
198 REQUIRE(offset == WORD * 2);
199 REQUIRE(size == PAGE * 0x1d);
200 ++num;
201 });
202 REQUIRE(num == 3);
203}
204
205TEST_CASE("BufferBase: Partial page upload", "[video_core]") {
206 RasterizerInterface rasterizer;
207 BufferBase buffer(rasterizer, c, WORD);
208 buffer.UnmarkRegionAsCpuModified(c, WORD);
209 int num = 0;
210 buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
211 buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
212 buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
213 REQUIRE(offset == PAGE * 2);
214 REQUIRE(size == PAGE);
215 ++num;
216 });
217 REQUIRE(num == 1);
218 buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
219 REQUIRE(offset == PAGE * 9);
220 REQUIRE(size == PAGE);
221 ++num;
222 });
223 REQUIRE(num == 2);
224}
225
226TEST_CASE("BufferBase: Partial page upload with multiple words on the right") {
227 RasterizerInterface rasterizer;
228 BufferBase buffer(rasterizer, c, WORD * 8);
229 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
230 buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
231 int num = 0;
232 buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
233 REQUIRE(offset == PAGE * 13);
234 REQUIRE(size == WORD * 7 - PAGE * 3);
235 ++num;
236 });
237 REQUIRE(num == 1);
238 buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
239 REQUIRE(offset == WORD * 7 + PAGE * 10);
240 REQUIRE(size == PAGE * 3);
241 ++num;
242 });
243 REQUIRE(num == 2);
244}
245
246TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") {
247 RasterizerInterface rasterizer;
248 BufferBase buffer(rasterizer, c, WORD * 8);
249 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
250 buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
251 int num = 0;
252 buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
253 REQUIRE(offset == PAGE * 16);
254 REQUIRE(size == WORD * 7 - PAGE * 3);
255 ++num;
256 });
257 REQUIRE(num == 1);
258 buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
259 REQUIRE(offset == PAGE * 13);
260 REQUIRE(size == PAGE * 3);
261 ++num;
262 });
263 REQUIRE(num == 2);
264}
265
266TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") {
267 RasterizerInterface rasterizer;
268 BufferBase buffer(rasterizer, c, WORD * 8);
269 buffer.UnmarkRegionAsCpuModified(c, WORD * 8);
270 buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
271 int num = 0;
272 buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
273 REQUIRE(offset == PAGE * 16);
274 REQUIRE(size == WORD);
275 ++num;
276 });
277 REQUIRE(num == 1);
278 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
279 REQUIRE(offset == PAGE * 13);
280 REQUIRE(size == PAGE * 3);
281 ++num;
282 });
283 REQUIRE(num == 2);
284 buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
285 REQUIRE(offset == WORD + PAGE * 16);
286 REQUIRE(size == PAGE * 73);
287 ++num;
288 });
289 REQUIRE(num == 3);
290}
291
292TEST_CASE("BufferBase: Empty right bits", "[video_core]") {
293 RasterizerInterface rasterizer;
294 BufferBase buffer(rasterizer, c, WORD * 2048);
295 buffer.UnmarkRegionAsCpuModified(c, WORD * 2048);
296 buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
297 buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
298 REQUIRE(offset == WORD - PAGE);
299 REQUIRE(size == PAGE * 2);
300 });
301}
302
303TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") {
304 RasterizerInterface rasterizer;
305 BufferBase buffer(rasterizer, c, WORD);
306 buffer.UnmarkRegionAsCpuModified(c, WORD);
307 buffer.MarkRegionAsCpuModified(c, PAGE);
308 int num = 0;
309 buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
310 buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
311 buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
312 REQUIRE(num == 0);
313 buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
314 REQUIRE(num == 1);
315 buffer.MarkRegionAsCpuModified(c, WORD);
316 REQUIRE(rasterizer.Count() == 0);
317}
318
319TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") {
320 RasterizerInterface rasterizer;
321 BufferBase buffer(rasterizer, c, 0x22000);
322 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
323 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
324 REQUIRE(rasterizer.Count() == 0);
325 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
326 REQUIRE(rasterizer.Count() == 1);
327 REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2));
328 buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2);
329 buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2);
330 REQUIRE(rasterizer.Count() == 2);
331}
332
333TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") {
334 RasterizerInterface rasterizer;
335 BufferBase buffer(rasterizer, c, 0x310720);
336 buffer.UnmarkRegionAsCpuModified(c, 0x310720);
337 REQUIRE(rasterizer.Count(c) == 1);
338 REQUIRE(rasterizer.Count(c + PAGE) == 1);
339 REQUIRE(rasterizer.Count(c + WORD) == 1);
340 REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
341}
342
343TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") {
344 RasterizerInterface rasterizer;
345 BufferBase buffer(rasterizer, c, WORD);
346 buffer.UnmarkRegionAsCpuModified(c, WORD);
347 buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
348 buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
349 buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
350 static constexpr std::array<u64, 2> offsets{PAGE, PAGE * 3};
351 static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
352 REQUIRE(offset == offsets.at(i));
353 REQUIRE(size == sizes.at(i));
354 ++i;
355 });
356}
357
358TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") {
359 RasterizerInterface rasterizer;
360 BufferBase buffer(rasterizer, c, 0x22000);
361 buffer.UnmarkRegionAsCpuModified(c, 0x22000);
362 REQUIRE(rasterizer.Count() == 0x22);
363 buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
364 buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
365 buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
366 static constexpr std::array<u64, 2> offsets{PAGE * 0x1B, PAGE * 0x21};
367 static constexpr std::array<u64, 2> sizes{PAGE, PAGE};
368 REQUIRE(offset == offsets.at(i));
369 REQUIRE(size == sizes.at(i));
370 ++i;
371 });
372}
373
374TEST_CASE("BufferBase: Single page modified range", "[video_core]") {
375 RasterizerInterface rasterizer;
376 BufferBase buffer(rasterizer, c, PAGE);
377 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
378 buffer.UnmarkRegionAsCpuModified(c, PAGE);
379 REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
380}
381
382TEST_CASE("BufferBase: Two page modified range", "[video_core]") {
383 RasterizerInterface rasterizer;
384 BufferBase buffer(rasterizer, c, PAGE * 2);
385 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
386 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
387 REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2));
388 buffer.UnmarkRegionAsCpuModified(c, PAGE);
389 REQUIRE(!buffer.IsRegionCpuModified(c, PAGE));
390}
391
392TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") {
393 for (int offset = 0; offset < 4; ++offset) {
394 const VAddr address = c + WORD * offset;
395 RasterizerInterface rasterizer;
396 BufferBase buffer(rasterizer, address, WORD * 4);
397 REQUIRE(buffer.IsRegionCpuModified(address, PAGE));
398 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE));
399 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE));
400
401 buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
402 REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD));
403 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE));
404 REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE));
405 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE));
406 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
407 REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
408
409 buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
410 REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
411 }
412}
413
414TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") {
415 RasterizerInterface rasterizer;
416 BufferBase buffer(rasterizer, c, WORD * 16);
417 buffer.UnmarkRegionAsCpuModified(c, WORD * 16);
418 REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16));
419
420 buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
421 REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16));
422 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2));
423 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2));
424 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2));
425 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
426 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
427 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
428 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
429 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
430}
431
432TEST_CASE("BufferBase: Out of bounds region query") {
433 RasterizerInterface rasterizer;
434 BufferBase buffer(rasterizer, c, WORD * 16);
435 REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE));
436 REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE));
437 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE));
438 REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64));
439 REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64));
440}
441
442TEST_CASE("BufferBase: Wrap word regions") {
443 RasterizerInterface rasterizer;
444 BufferBase buffer(rasterizer, c, WORD * 2);
445 buffer.UnmarkRegionAsCpuModified(c, WORD * 2);
446 buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
447 REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2));
448 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE));
449 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE));
450 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE));
451 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
452 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
453 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
454
455 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
456 buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
457 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16));
458 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE));
459 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE));
460 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
461 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16));
462}
463
464TEST_CASE("BufferBase: Unaligned page region query") {
465 RasterizerInterface rasterizer;
466 BufferBase buffer(rasterizer, c, WORD);
467 buffer.UnmarkRegionAsCpuModified(c, WORD);
468 buffer.MarkRegionAsCpuModified(c + 4000, 1000);
469 REQUIRE(buffer.IsRegionCpuModified(c, PAGE));
470 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
471 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000));
472 REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1));
473}
474
475TEST_CASE("BufferBase: Cached write") {
476 RasterizerInterface rasterizer;
477 BufferBase buffer(rasterizer, c, WORD);
478 buffer.UnmarkRegionAsCpuModified(c, WORD);
479 buffer.CachedCpuWrite(c + PAGE, PAGE);
480 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
481 buffer.FlushCachedWrites();
482 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
483 buffer.MarkRegionAsCpuModified(c, WORD);
484 REQUIRE(rasterizer.Count() == 0);
485}
486
487TEST_CASE("BufferBase: Multiple cached write") {
488 RasterizerInterface rasterizer;
489 BufferBase buffer(rasterizer, c, WORD);
490 buffer.UnmarkRegionAsCpuModified(c, WORD);
491 buffer.CachedCpuWrite(c + PAGE, PAGE);
492 buffer.CachedCpuWrite(c + PAGE * 3, PAGE);
493 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
494 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
495 buffer.FlushCachedWrites();
496 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
497 REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE));
498 buffer.MarkRegionAsCpuModified(c, WORD);
499 REQUIRE(rasterizer.Count() == 0);
500}
501
502TEST_CASE("BufferBase: Cached write unmarked") {
503 RasterizerInterface rasterizer;
504 BufferBase buffer(rasterizer, c, WORD);
505 buffer.UnmarkRegionAsCpuModified(c, WORD);
506 buffer.CachedCpuWrite(c + PAGE, PAGE);
507 buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE);
508 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
509 buffer.FlushCachedWrites();
510 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
511 buffer.MarkRegionAsCpuModified(c, WORD);
512 REQUIRE(rasterizer.Count() == 0);
513}
514
515TEST_CASE("BufferBase: Cached write iterated") {
516 RasterizerInterface rasterizer;
517 BufferBase buffer(rasterizer, c, WORD);
518 buffer.UnmarkRegionAsCpuModified(c, WORD);
519 buffer.CachedCpuWrite(c + PAGE, PAGE);
520 int num = 0;
521 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
522 REQUIRE(num == 0);
523 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
524 buffer.FlushCachedWrites();
525 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
526 buffer.MarkRegionAsCpuModified(c, WORD);
527 REQUIRE(rasterizer.Count() == 0);
528}
529
530TEST_CASE("BufferBase: Cached write downloads") {
531 RasterizerInterface rasterizer;
532 BufferBase buffer(rasterizer, c, WORD);
533 buffer.UnmarkRegionAsCpuModified(c, WORD);
534 REQUIRE(rasterizer.Count() == 64);
535 buffer.CachedCpuWrite(c + PAGE, PAGE);
536 REQUIRE(rasterizer.Count() == 63);
537 buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
538 int num = 0;
539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites();
545 REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
547 buffer.MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549}
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp
new file mode 100644
index 000000000..3981907a2
--- /dev/null
+++ b/src/tests/video_core/memory_tracker.cpp
@@ -0,0 +1,549 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include <memory>
5#include <stdexcept>
6#include <unordered_map>
7
8#include <catch2/catch_test_macros.hpp>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/buffer_cache/memory_tracker_base.h"
13
14namespace {
15using Range = std::pair<u64, u64>;
16
17constexpr u64 PAGE = 4096;
18constexpr u64 WORD = 4096 * 64;
19constexpr u64 HIGH_PAGE_BITS = 22;
20constexpr u64 HIGH_PAGE_SIZE = 1ULL << HIGH_PAGE_BITS;
21
22constexpr VAddr c = 16 * HIGH_PAGE_SIZE;
23
24class RasterizerInterface {
25public:
26 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
27 const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
28 const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
29 Core::Memory::YUZU_PAGEBITS};
30 for (u64 page = page_start; page < page_end; ++page) {
31 int& value = page_table[page];
32 value += delta;
33 if (value < 0) {
34 throw std::logic_error{"negative page"};
35 }
36 if (value == 0) {
37 page_table.erase(page);
38 }
39 }
40 }
41
42 [[nodiscard]] int Count(VAddr addr) const noexcept {
43 const auto it = page_table.find(addr >> Core::Memory::YUZU_PAGEBITS);
44 return it == page_table.end() ? 0 : it->second;
45 }
46
47 [[nodiscard]] unsigned Count() const noexcept {
48 unsigned count = 0;
49 for (const auto& [index, value] : page_table) {
50 count += value;
51 }
52 return count;
53 }
54
55private:
56 std::unordered_map<u64, int> page_table;
57};
58} // Anonymous namespace
59
60using MemoryTracker = VideoCommon::MemoryTrackerBase<RasterizerInterface>;
61
62TEST_CASE("MemoryTracker: Small region", "[video_core]") {
63 RasterizerInterface rasterizer;
64 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
65 REQUIRE(rasterizer.Count() == 0);
66 memory_track->UnmarkRegionAsCpuModified(c, WORD);
67 REQUIRE(rasterizer.Count() == WORD / PAGE);
68 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{0, 0});
69
70 memory_track->MarkRegionAsCpuModified(c + PAGE, 1);
71 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD) == Range{c + PAGE * 1, c + PAGE * 2});
72}
73
74TEST_CASE("MemoryTracker: Large region", "[video_core]") {
75 RasterizerInterface rasterizer;
76 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
77 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
78 memory_track->MarkRegionAsCpuModified(c + 4096, WORD * 4);
79 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD + PAGE * 2) ==
80 Range{c + PAGE, c + WORD + PAGE * 2});
81 REQUIRE(memory_track->ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) ==
82 Range{c + PAGE * 2, c + PAGE * 8});
83 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 4 + PAGE});
84 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 4, PAGE) ==
85 Range{c + WORD * 4, c + WORD * 4 + PAGE});
86 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) ==
87 Range{c + WORD * 3 + PAGE * 63, c + WORD * 4});
88
89 memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE);
90 memory_track->MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
91 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) ==
92 Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 9});
93
94 memory_track->UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE);
95 REQUIRE(memory_track->ModifiedCpuRegion(c + WORD * 5, WORD) ==
96 Range{c + WORD * 5 + PAGE * 6, c + WORD * 5 + PAGE * 7});
97
98 memory_track->MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63);
99 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{c + PAGE, c + WORD * 32});
100
101 memory_track->UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE);
102 memory_track->UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE);
103
104 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
105 REQUIRE(memory_track->ModifiedCpuRegion(c, WORD * 32) == Range{0, 0});
106}
107
108TEST_CASE("MemoryTracker: Rasterizer counting", "[video_core]") {
109 RasterizerInterface rasterizer;
110 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
111 REQUIRE(rasterizer.Count() == 0);
112 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
113 REQUIRE(rasterizer.Count() == 1);
114 memory_track->MarkRegionAsCpuModified(c, PAGE * 2);
115 REQUIRE(rasterizer.Count() == 0);
116 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
117 memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE);
118 REQUIRE(rasterizer.Count() == 2);
119 memory_track->MarkRegionAsCpuModified(c, PAGE * 2);
120 REQUIRE(rasterizer.Count() == 0);
121}
122
123TEST_CASE("MemoryTracker: Basic range", "[video_core]") {
124 RasterizerInterface rasterizer;
125 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
126 memory_track->UnmarkRegionAsCpuModified(c, WORD);
127 memory_track->MarkRegionAsCpuModified(c, PAGE);
128 int num = 0;
129 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
130 REQUIRE(offset == c);
131 REQUIRE(size == PAGE);
132 ++num;
133 });
134 REQUIRE(num == 1U);
135}
136
137TEST_CASE("MemoryTracker: Border upload", "[video_core]") {
138 RasterizerInterface rasterizer;
139 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
140 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
141 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
142 memory_track->ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) {
143 REQUIRE(offset == c + WORD - PAGE);
144 REQUIRE(size == PAGE * 2);
145 });
146}
147
148TEST_CASE("MemoryTracker: Border upload range", "[video_core]") {
149 RasterizerInterface rasterizer;
150 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
151 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
152 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
153 memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) {
154 REQUIRE(offset == c + WORD - PAGE);
155 REQUIRE(size == PAGE * 2);
156 });
157 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
158 memory_track->ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) {
159 REQUIRE(offset == c + WORD - PAGE);
160 REQUIRE(size == PAGE);
161 });
162 memory_track->ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) {
163 REQUIRE(offset == c + WORD);
164 REQUIRE(size == PAGE);
165 });
166}
167
168TEST_CASE("MemoryTracker: Border upload partial range", "[video_core]") {
169 RasterizerInterface rasterizer;
170 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
171 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2);
172 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
173 memory_track->ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) {
174 REQUIRE(offset == c + WORD - PAGE);
175 REQUIRE(size == PAGE * 2);
176 });
177 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
178 memory_track->ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) {
179 REQUIRE(offset == c + WORD - PAGE);
180 REQUIRE(size == PAGE);
181 });
182 memory_track->ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) {
183 REQUIRE(offset == c + WORD);
184 REQUIRE(size == PAGE);
185 });
186}
187
188TEST_CASE("MemoryTracker: Partial word uploads", "[video_core]") {
189 RasterizerInterface rasterizer;
190 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
191 int num = 0;
192 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
193 REQUIRE(offset == c);
194 REQUIRE(size == WORD);
195 ++num;
196 });
197 REQUIRE(num == 1);
198 memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) {
199 REQUIRE(offset == c + WORD);
200 REQUIRE(size == WORD);
201 ++num;
202 });
203 REQUIRE(num == 2);
204 memory_track->ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) {
205 REQUIRE(offset == c + WORD * 2);
206 REQUIRE(size == PAGE * 0x1d);
207 ++num;
208 });
209 REQUIRE(num == 3);
210}
211
212TEST_CASE("MemoryTracker: Partial page upload", "[video_core]") {
213 RasterizerInterface rasterizer;
214 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
215 memory_track->UnmarkRegionAsCpuModified(c, WORD);
216 int num = 0;
217 memory_track->MarkRegionAsCpuModified(c + PAGE * 2, PAGE);
218 memory_track->MarkRegionAsCpuModified(c + PAGE * 9, PAGE);
219 memory_track->ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) {
220 REQUIRE(offset == c + PAGE * 2);
221 REQUIRE(size == PAGE);
222 ++num;
223 });
224 REQUIRE(num == 1);
225 memory_track->ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) {
226 REQUIRE(offset == c + PAGE * 9);
227 REQUIRE(size == PAGE);
228 ++num;
229 });
230 REQUIRE(num == 2);
231}
232
233TEST_CASE("MemoryTracker: Partial page upload with multiple words on the right") {
234 RasterizerInterface rasterizer;
235 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
236 memory_track->UnmarkRegionAsCpuModified(c, WORD * 9);
237 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
238 int num = 0;
239 memory_track->ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) {
240 REQUIRE(offset == c + PAGE * 13);
241 REQUIRE(size == WORD * 7 - PAGE * 3);
242 ++num;
243 });
244 REQUIRE(num == 1);
245 memory_track->ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) {
246 REQUIRE(offset == c + WORD * 7 + PAGE * 10);
247 REQUIRE(size == PAGE * 3);
248 ++num;
249 });
250 REQUIRE(num == 2);
251}
252
253TEST_CASE("MemoryTracker: Partial page upload with multiple words on the left", "[video_core]") {
254 RasterizerInterface rasterizer;
255 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
256 memory_track->UnmarkRegionAsCpuModified(c, WORD * 8);
257 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7);
258 int num = 0;
259 memory_track->ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) {
260 REQUIRE(offset == c + PAGE * 16);
261 REQUIRE(size == WORD * 7 - PAGE * 3);
262 ++num;
263 });
264 REQUIRE(num == 1);
265 memory_track->ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) {
266 REQUIRE(offset == c + PAGE * 13);
267 REQUIRE(size == PAGE * 3);
268 ++num;
269 });
270 REQUIRE(num == 2);
271}
272
273TEST_CASE("MemoryTracker: Partial page upload with multiple words in the middle", "[video_core]") {
274 RasterizerInterface rasterizer;
275 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
276 memory_track->UnmarkRegionAsCpuModified(c, WORD * 8);
277 memory_track->MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140);
278 int num = 0;
279 memory_track->ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) {
280 REQUIRE(offset == c + PAGE * 16);
281 REQUIRE(size == WORD);
282 ++num;
283 });
284 REQUIRE(num == 1);
285 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) {
286 REQUIRE(offset == c + PAGE * 13);
287 REQUIRE(size == PAGE * 3);
288 ++num;
289 });
290 REQUIRE(num == 2);
291 memory_track->ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) {
292 REQUIRE(offset == c + WORD + PAGE * 16);
293 REQUIRE(size == PAGE * 73);
294 ++num;
295 });
296 REQUIRE(num == 3);
297}
298
299TEST_CASE("MemoryTracker: Empty right bits", "[video_core]") {
300 RasterizerInterface rasterizer;
301 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
302 memory_track->UnmarkRegionAsCpuModified(c, WORD * 2048);
303 memory_track->MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2);
304 memory_track->ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) {
305 REQUIRE(offset == c + WORD - PAGE);
306 REQUIRE(size == PAGE * 2);
307 });
308}
309
310TEST_CASE("MemoryTracker: Out of bound ranges 1", "[video_core]") {
311 RasterizerInterface rasterizer;
312 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
313 memory_track->UnmarkRegionAsCpuModified(c - WORD, 3 * WORD);
314 memory_track->MarkRegionAsCpuModified(c, PAGE);
315 REQUIRE(rasterizer.Count() == (3 * WORD - PAGE) / PAGE);
316 int num = 0;
317 memory_track->ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; });
318 memory_track->ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; });
319 memory_track->ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; });
320 REQUIRE(num == 0);
321 memory_track->ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; });
322 REQUIRE(num == 1);
323 memory_track->MarkRegionAsCpuModified(c, WORD);
324 REQUIRE(rasterizer.Count() == 2 * WORD / PAGE);
325}
326
327TEST_CASE("MemoryTracker: Out of bound ranges 2", "[video_core]") {
328 RasterizerInterface rasterizer;
329 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
330 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x22000, PAGE));
331 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x28000, PAGE));
332 REQUIRE(rasterizer.Count() == 2);
333 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100));
334 REQUIRE(rasterizer.Count() == 3);
335 REQUIRE_NOTHROW(memory_track->UnmarkRegionAsCpuModified(c - PAGE, PAGE * 2));
336 memory_track->UnmarkRegionAsCpuModified(c - PAGE * 3, PAGE * 2);
337 memory_track->UnmarkRegionAsCpuModified(c - PAGE * 2, PAGE * 2);
338 REQUIRE(rasterizer.Count() == 7);
339}
340
341TEST_CASE("MemoryTracker: Out of bound ranges 3", "[video_core]") {
342 RasterizerInterface rasterizer;
343 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
344 memory_track->UnmarkRegionAsCpuModified(c, 0x310720);
345 REQUIRE(rasterizer.Count(c) == 1);
346 REQUIRE(rasterizer.Count(c + PAGE) == 1);
347 REQUIRE(rasterizer.Count(c + WORD) == 1);
348 REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1);
349}
350
351TEST_CASE("MemoryTracker: Sparse regions 1", "[video_core]") {
352 RasterizerInterface rasterizer;
353 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
354 memory_track->UnmarkRegionAsCpuModified(c, WORD);
355 memory_track->MarkRegionAsCpuModified(c + PAGE * 1, PAGE);
356 memory_track->MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4);
357 memory_track->ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable {
358 static constexpr std::array<u64, 2> offsets{c + PAGE, c + PAGE * 3};
359 static constexpr std::array<u64, 2> sizes{PAGE, PAGE * 4};
360 REQUIRE(offset == offsets.at(i));
361 REQUIRE(size == sizes.at(i));
362 ++i;
363 });
364}
365
366TEST_CASE("MemoryTracker: Sparse regions 2", "[video_core]") {
367 RasterizerInterface rasterizer;
368 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
369 memory_track->UnmarkRegionAsCpuModified(c, PAGE * 0x23);
370 REQUIRE(rasterizer.Count() == 0x23);
371 memory_track->MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE);
372 memory_track->MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE);
373 memory_track->ForEachUploadRange(c, PAGE * 0x23, [i = 0](u64 offset, u64 size) mutable {
374 static constexpr std::array<u64, 3> offsets{c + PAGE * 0x1B, c + PAGE * 0x21};
375 static constexpr std::array<u64, 3> sizes{PAGE, PAGE};
376 REQUIRE(offset == offsets.at(i));
377 REQUIRE(size == sizes.at(i));
378 ++i;
379 });
380}
381
382TEST_CASE("MemoryTracker: Single page modified range", "[video_core]") {
383 RasterizerInterface rasterizer;
384 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
385 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
386 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
387 REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE));
388}
389
390TEST_CASE("MemoryTracker: Two page modified range", "[video_core]") {
391 RasterizerInterface rasterizer;
392 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
393 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
394 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
395 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE * 2));
396 memory_track->UnmarkRegionAsCpuModified(c, PAGE);
397 REQUIRE(!memory_track->IsRegionCpuModified(c, PAGE));
398}
399
400TEST_CASE("MemoryTracker: Multi word modified ranges", "[video_core]") {
401 for (int offset = 0; offset < 4; ++offset) {
402 const VAddr address = c + WORD * offset;
403 RasterizerInterface rasterizer;
404 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
405 REQUIRE(memory_track->IsRegionCpuModified(address, PAGE));
406 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 48, PAGE));
407 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 56, PAGE));
408
409 memory_track->UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE);
410 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE, WORD));
411 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE));
412 REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE));
413 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 33, PAGE));
414 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 31, PAGE * 2));
415 REQUIRE(memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
416
417 memory_track->UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE);
418 REQUIRE(!memory_track->IsRegionCpuModified(address + PAGE * 32, PAGE * 2));
419 }
420}
421
422TEST_CASE("MemoryTracker: Single page in large region", "[video_core]") {
423 RasterizerInterface rasterizer;
424 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
425 memory_track->UnmarkRegionAsCpuModified(c, WORD * 16);
426 REQUIRE(!memory_track->IsRegionCpuModified(c, WORD * 16));
427
428 memory_track->MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE);
429 REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 16));
430 REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 10, WORD * 2));
431 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 11, WORD * 2));
432 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12, WORD * 2));
433 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8));
434 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8));
435 REQUIRE(!memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE));
436 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2));
437 REQUIRE(memory_track->IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2));
438}
439
440TEST_CASE("MemoryTracker: Wrap word regions") {
441 RasterizerInterface rasterizer;
442 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
443 memory_track->UnmarkRegionAsCpuModified(c, WORD * 32);
444 memory_track->MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2);
445 REQUIRE(memory_track->IsRegionCpuModified(c, WORD * 2));
446 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 62, PAGE));
447 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE));
448 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 64, PAGE));
449 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 2));
450 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 63, PAGE * 8));
451 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 60, PAGE * 8));
452
453 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16));
454 memory_track->MarkRegionAsCpuModified(c + PAGE * 127, PAGE);
455 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, WORD * 16));
456 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 127, PAGE));
457 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE));
458 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 126, PAGE * 2));
459 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 128, WORD * 16));
460}
461
462TEST_CASE("MemoryTracker: Unaligned page region query") {
463 RasterizerInterface rasterizer;
464 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
465 memory_track->UnmarkRegionAsCpuModified(c, WORD);
466 memory_track->MarkRegionAsCpuModified(c + 4000, 1000);
467 REQUIRE(memory_track->IsRegionCpuModified(c, PAGE));
468 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
469 REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1000));
470 REQUIRE(memory_track->IsRegionCpuModified(c + 4000, 1));
471}
472
473TEST_CASE("MemoryTracker: Cached write") {
474 RasterizerInterface rasterizer;
475 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
476 memory_track->UnmarkRegionAsCpuModified(c, WORD);
477 memory_track->CachedCpuWrite(c + PAGE, c + PAGE);
478 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
479 memory_track->FlushCachedWrites();
480 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
481 memory_track->MarkRegionAsCpuModified(c, WORD);
482 REQUIRE(rasterizer.Count() == 0);
483}
484
485TEST_CASE("MemoryTracker: Multiple cached write") {
486 RasterizerInterface rasterizer;
487 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
488 memory_track->UnmarkRegionAsCpuModified(c, WORD);
489 memory_track->CachedCpuWrite(c + PAGE, PAGE);
490 memory_track->CachedCpuWrite(c + PAGE * 3, PAGE);
491 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
492 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE));
493 memory_track->FlushCachedWrites();
494 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
495 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE * 3, PAGE));
496 memory_track->MarkRegionAsCpuModified(c, WORD);
497 REQUIRE(rasterizer.Count() == 0);
498}
499
500TEST_CASE("MemoryTracker: Cached write unmarked") {
501 RasterizerInterface rasterizer;
502 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
503 memory_track->UnmarkRegionAsCpuModified(c, WORD);
504 memory_track->CachedCpuWrite(c + PAGE, PAGE);
505 memory_track->UnmarkRegionAsCpuModified(c + PAGE, PAGE);
506 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
507 memory_track->FlushCachedWrites();
508 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
509 memory_track->MarkRegionAsCpuModified(c, WORD);
510 REQUIRE(rasterizer.Count() == 0);
511}
512
513TEST_CASE("MemoryTracker: Cached write iterated") {
514 RasterizerInterface rasterizer;
515 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
516 memory_track->UnmarkRegionAsCpuModified(c, WORD);
517 memory_track->CachedCpuWrite(c + PAGE, PAGE);
518 int num = 0;
519 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
520 REQUIRE(num == 0);
521 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
522 memory_track->FlushCachedWrites();
523 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
524 memory_track->MarkRegionAsCpuModified(c, WORD);
525 REQUIRE(rasterizer.Count() == 0);
526}
527
528TEST_CASE("MemoryTracker: Cached write downloads") {
529 RasterizerInterface rasterizer;
530 std::unique_ptr<MemoryTracker> memory_track(std::make_unique<MemoryTracker>(rasterizer));
531 memory_track->UnmarkRegionAsCpuModified(c, WORD);
532 REQUIRE(rasterizer.Count() == 64);
533 memory_track->CachedCpuWrite(c + PAGE, PAGE);
534 REQUIRE(rasterizer.Count() == 63);
535 memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE);
536 int num = 0;
537 memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
538 REQUIRE(num == 1);
539 num = 0;
540 memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0);
542 REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
544 memory_track->FlushCachedWrites();
545 REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE));
546 REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
547 memory_track->MarkRegionAsCpuModified(c, WORD);
548 REQUIRE(rasterizer.Count() == 0);
549} \ No newline at end of file
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e904573d7..a0009a36f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -11,8 +11,11 @@ endif()
11 11
12add_library(video_core STATIC 12add_library(video_core STATIC
13 buffer_cache/buffer_base.h 13 buffer_cache/buffer_base.h
14 buffer_cache/buffer_cache_base.h
14 buffer_cache/buffer_cache.cpp 15 buffer_cache/buffer_cache.cpp
15 buffer_cache/buffer_cache.h 16 buffer_cache/buffer_cache.h
17 buffer_cache/memory_tracker_base.h
18 buffer_cache/word_manager.h
16 cache_types.h 19 cache_types.h
17 cdma_pusher.cpp 20 cdma_pusher.cpp
18 cdma_pusher.h 21 cdma_pusher.h
@@ -104,6 +107,7 @@ add_library(video_core STATIC
104 renderer_null/renderer_null.h 107 renderer_null/renderer_null.h
105 renderer_opengl/blit_image.cpp 108 renderer_opengl/blit_image.cpp
106 renderer_opengl/blit_image.h 109 renderer_opengl/blit_image.h
110 renderer_opengl/gl_buffer_cache_base.cpp
107 renderer_opengl/gl_buffer_cache.cpp 111 renderer_opengl/gl_buffer_cache.cpp
108 renderer_opengl/gl_buffer_cache.h 112 renderer_opengl/gl_buffer_cache.h
109 renderer_opengl/gl_compute_pipeline.cpp 113 renderer_opengl/gl_compute_pipeline.cpp
@@ -154,6 +158,7 @@ add_library(video_core STATIC
154 renderer_vulkan/renderer_vulkan.cpp 158 renderer_vulkan/renderer_vulkan.cpp
155 renderer_vulkan/vk_blit_screen.cpp 159 renderer_vulkan/vk_blit_screen.cpp
156 renderer_vulkan/vk_blit_screen.h 160 renderer_vulkan/vk_blit_screen.h
161 renderer_vulkan/vk_buffer_cache_base.cpp
157 renderer_vulkan/vk_buffer_cache.cpp 162 renderer_vulkan/vk_buffer_cache.cpp
158 renderer_vulkan/vk_buffer_cache.h 163 renderer_vulkan/vk_buffer_cache.h
159 renderer_vulkan/vk_command_pool.cpp 164 renderer_vulkan/vk_command_pool.cpp
@@ -174,6 +179,8 @@ add_library(video_core STATIC
174 renderer_vulkan/vk_master_semaphore.h 179 renderer_vulkan/vk_master_semaphore.h
175 renderer_vulkan/vk_pipeline_cache.cpp 180 renderer_vulkan/vk_pipeline_cache.cpp
176 renderer_vulkan/vk_pipeline_cache.h 181 renderer_vulkan/vk_pipeline_cache.h
182 renderer_vulkan/vk_present_manager.cpp
183 renderer_vulkan/vk_present_manager.h
177 renderer_vulkan/vk_query_cache.cpp 184 renderer_vulkan/vk_query_cache.cpp
178 renderer_vulkan/vk_query_cache.h 185 renderer_vulkan/vk_query_cache.h
179 renderer_vulkan/vk_rasterizer.cpp 186 renderer_vulkan/vk_rasterizer.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 1b4d63616..9cbd95c4b 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
@@ -11,9 +11,7 @@
11#include "common/alignment.h" 11#include "common/alignment.h"
12#include "common/common_funcs.h" 12#include "common/common_funcs.h"
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "common/div_ceil.h" 14#include "video_core/buffer_cache/word_manager.h"
15#include "common/settings.h"
16#include "core/memory.h"
17 15
18namespace VideoCommon { 16namespace VideoCommon {
19 17
@@ -36,116 +34,12 @@ struct NullBufferParams {};
36 */ 34 */
37template <class RasterizerInterface> 35template <class RasterizerInterface>
38class BufferBase { 36class BufferBase {
39 static constexpr u64 PAGES_PER_WORD = 64;
40 static constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
41 static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
42
43 /// Vector tracking modified pages tightly packed with small vector optimization
44 union WordsArray {
45 /// Returns the pointer to the words state
46 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
47 return is_short ? &stack : heap;
48 }
49
50 /// Returns the pointer to the words state
51 [[nodiscard]] u64* Pointer(bool is_short) noexcept {
52 return is_short ? &stack : heap;
53 }
54
55 u64 stack = 0; ///< Small buffers storage
56 u64* heap; ///< Not-small buffers pointer to the storage
57 };
58
59 struct Words {
60 explicit Words() = default;
61 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
62 if (IsShort()) {
63 cpu.stack = ~u64{0};
64 gpu.stack = 0;
65 cached_cpu.stack = 0;
66 untracked.stack = ~u64{0};
67 } else {
68 // Share allocation between CPU and GPU pages and set their default values
69 const size_t num_words = NumWords();
70 u64* const alloc = new u64[num_words * 4];
71 cpu.heap = alloc;
72 gpu.heap = alloc + num_words;
73 cached_cpu.heap = alloc + num_words * 2;
74 untracked.heap = alloc + num_words * 3;
75 std::fill_n(cpu.heap, num_words, ~u64{0});
76 std::fill_n(gpu.heap, num_words, 0);
77 std::fill_n(cached_cpu.heap, num_words, 0);
78 std::fill_n(untracked.heap, num_words, ~u64{0});
79 }
80 // Clean up tailing bits
81 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
82 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
83 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
84 const u64 last_word = (~u64{0} << shift) >> shift;
85 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
86 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
87 }
88
89 ~Words() {
90 Release();
91 }
92
93 Words& operator=(Words&& rhs) noexcept {
94 Release();
95 size_bytes = rhs.size_bytes;
96 cpu = rhs.cpu;
97 gpu = rhs.gpu;
98 cached_cpu = rhs.cached_cpu;
99 untracked = rhs.untracked;
100 rhs.cpu.heap = nullptr;
101 return *this;
102 }
103
104 Words(Words&& rhs) noexcept
105 : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu},
106 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
107 rhs.cpu.heap = nullptr;
108 }
109
110 Words& operator=(const Words&) = delete;
111 Words(const Words&) = delete;
112
113 /// Returns true when the buffer fits in the small vector optimization
114 [[nodiscard]] bool IsShort() const noexcept {
115 return size_bytes <= BYTES_PER_WORD;
116 }
117
118 /// Returns the number of words of the buffer
119 [[nodiscard]] size_t NumWords() const noexcept {
120 return Common::DivCeil(size_bytes, BYTES_PER_WORD);
121 }
122
123 /// Release buffer resources
124 void Release() {
125 if (!IsShort()) {
126 // CPU written words is the base for the heap allocation
127 delete[] cpu.heap;
128 }
129 }
130
131 u64 size_bytes = 0;
132 WordsArray cpu;
133 WordsArray gpu;
134 WordsArray cached_cpu;
135 WordsArray untracked;
136 };
137
138 enum class Type {
139 CPU,
140 GPU,
141 CachedCPU,
142 Untracked,
143 };
144
145public: 37public:
146 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) 38 static constexpr u64 BASE_PAGE_BITS = 16;
147 : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, 39 static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
148 words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} 40
41 explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes_)
42 : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
149 43
150 explicit BufferBase(NullBufferParams) {} 44 explicit BufferBase(NullBufferParams) {}
151 45
@@ -155,100 +49,6 @@ public:
155 BufferBase& operator=(BufferBase&&) = default; 49 BufferBase& operator=(BufferBase&&) = default;
156 BufferBase(BufferBase&&) = default; 50 BufferBase(BufferBase&&) = default;
157 51
158 /// Returns the inclusive CPU modified range in a begin end pair
159 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
160 u64 query_size) const noexcept {
161 const u64 offset = query_cpu_addr - cpu_addr;
162 return ModifiedRegion<Type::CPU>(offset, query_size);
163 }
164
165 /// Returns the inclusive GPU modified range in a begin end pair
166 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
167 u64 query_size) const noexcept {
168 const u64 offset = query_cpu_addr - cpu_addr;
169 return ModifiedRegion<Type::GPU>(offset, query_size);
170 }
171
172 /// Returns true if a region has been modified from the CPU
173 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
174 const u64 offset = query_cpu_addr - cpu_addr;
175 return IsRegionModified<Type::CPU>(offset, query_size);
176 }
177
178 /// Returns true if a region has been modified from the GPU
179 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept {
180 const u64 offset = query_cpu_addr - cpu_addr;
181 return IsRegionModified<Type::GPU>(offset, query_size);
182 }
183
184 /// Mark region as CPU modified, notifying the rasterizer about this change
185 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
186 ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size);
187 }
188
189 /// Unmark region as CPU modified, notifying the rasterizer about this change
190 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) {
191 ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size);
192 }
193
194 /// Mark region as modified from the host GPU
195 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
196 ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size);
197 }
198
199 /// Unmark region as modified from the host GPU
200 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept {
201 ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size);
202 }
203
204 /// Mark region as modified from the CPU
205 /// but don't mark it as modified until FlusHCachedWrites is called.
206 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) {
207 flags |= BufferFlagBits::CachedWrites;
208 ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size);
209 }
210
211 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
212 void FlushCachedWrites() noexcept {
213 flags &= ~BufferFlagBits::CachedWrites;
214 const u64 num_words = NumWords();
215 u64* const cached_words = Array<Type::CachedCPU>();
216 u64* const untracked_words = Array<Type::Untracked>();
217 u64* const cpu_words = Array<Type::CPU>();
218 for (u64 word_index = 0; word_index < num_words; ++word_index) {
219 const u64 cached_bits = cached_words[word_index];
220 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
221 untracked_words[word_index] |= cached_bits;
222 cpu_words[word_index] |= cached_bits;
223 if (!Settings::values.use_pessimistic_flushes) {
224 cached_words[word_index] = 0;
225 }
226 }
227 }
228
229 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
230 template <typename Func>
231 void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
232 ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func);
233 }
234
235 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
236 template <typename Func>
237 void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) {
238 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func);
239 }
240
241 template <typename Func>
242 void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 size, Func&& func) {
243 ForEachModifiedRange<Type::GPU>(query_cpu_range, size, true, func);
244 }
245
246 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
247 template <typename Func>
248 void ForEachDownloadRange(Func&& func) {
249 ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func);
250 }
251
252 /// Mark buffer as picked 52 /// Mark buffer as picked
253 void Pick() noexcept { 53 void Pick() noexcept {
254 flags |= BufferFlagBits::Picked; 54 flags |= BufferFlagBits::Picked;
@@ -295,11 +95,6 @@ public:
295 return static_cast<u32>(other_cpu_addr - cpu_addr); 95 return static_cast<u32>(other_cpu_addr - cpu_addr);
296 } 96 }
297 97
298 /// Returns the size in bytes of the buffer
299 [[nodiscard]] u64 SizeBytes() const noexcept {
300 return words.size_bytes;
301 }
302
303 size_t getLRUID() const noexcept { 98 size_t getLRUID() const noexcept {
304 return lru_id; 99 return lru_id;
305 } 100 }
@@ -308,305 +103,16 @@ public:
308 lru_id = lru_id_; 103 lru_id = lru_id_;
309 } 104 }
310 105
311private: 106 size_t SizeBytes() const {
312 template <Type type> 107 return size_bytes;
313 u64* Array() noexcept {
314 if constexpr (type == Type::CPU) {
315 return words.cpu.Pointer(IsShort());
316 } else if constexpr (type == Type::GPU) {
317 return words.gpu.Pointer(IsShort());
318 } else if constexpr (type == Type::CachedCPU) {
319 return words.cached_cpu.Pointer(IsShort());
320 } else if constexpr (type == Type::Untracked) {
321 return words.untracked.Pointer(IsShort());
322 }
323 }
324
325 template <Type type>
326 const u64* Array() const noexcept {
327 if constexpr (type == Type::CPU) {
328 return words.cpu.Pointer(IsShort());
329 } else if constexpr (type == Type::GPU) {
330 return words.gpu.Pointer(IsShort());
331 } else if constexpr (type == Type::CachedCPU) {
332 return words.cached_cpu.Pointer(IsShort());
333 } else if constexpr (type == Type::Untracked) {
334 return words.untracked.Pointer(IsShort());
335 }
336 }
337
338 /**
339 * Change the state of a range of pages
340 *
341 * @param dirty_addr Base address to mark or unmark as modified
342 * @param size Size in bytes to mark or unmark as modified
343 */
344 template <Type type, bool enable>
345 void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) {
346 const s64 difference = dirty_addr - cpu_addr;
347 const u64 offset = std::max<s64>(difference, 0);
348 size += std::min<s64>(difference, 0);
349 if (offset >= SizeBytes() || size < 0) {
350 return;
351 }
352 u64* const untracked_words = Array<Type::Untracked>();
353 u64* const state_words = Array<type>();
354 const u64 offset_end = std::min(offset + size, SizeBytes());
355 const u64 begin_page_index = offset / BYTES_PER_PAGE;
356 const u64 begin_word_index = begin_page_index / PAGES_PER_WORD;
357 const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE);
358 const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD);
359 u64 page_index = begin_page_index % PAGES_PER_WORD;
360 u64 word_index = begin_word_index;
361 while (word_index < end_word_index) {
362 const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD;
363 const u64 left_offset =
364 std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD;
365 const u64 right_offset = page_index;
366 u64 bits = ~u64{0};
367 bits = (bits >> right_offset) << right_offset;
368 bits = (bits << left_offset) >> left_offset;
369 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
370 NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits);
371 }
372 if constexpr (enable) {
373 state_words[word_index] |= bits;
374 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
375 untracked_words[word_index] |= bits;
376 }
377 } else {
378 state_words[word_index] &= ~bits;
379 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
380 untracked_words[word_index] &= ~bits;
381 }
382 }
383 page_index = 0;
384 ++word_index;
385 }
386 }
387
388 /**
389 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
390 *
391 * @param word_index Index to the word to notify to the rasterizer
392 * @param current_bits Current state of the word
393 * @param new_bits New state of the word
394 *
395 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
396 */
397 template <bool add_to_rasterizer>
398 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
399 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
400 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
401 while (changed_bits != 0) {
402 const int empty_bits = std::countr_zero(changed_bits);
403 addr += empty_bits * BYTES_PER_PAGE;
404 changed_bits >>= empty_bits;
405
406 const u32 continuous_bits = std::countr_one(changed_bits);
407 const u64 size = continuous_bits * BYTES_PER_PAGE;
408 const VAddr begin_addr = addr;
409 addr += size;
410 changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0;
411 rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1);
412 }
413 }
414
415 /**
416 * Loop over each page in the given range, turn off those bits and notify the rasterizer if
417 * needed. Call the given function on each turned off range.
418 *
419 * @param query_cpu_range Base CPU address to loop over
420 * @param size Size in bytes of the CPU range to loop over
421 * @param func Function to call for each turned off region
422 */
423 template <Type type, typename Func>
424 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) {
425 static_assert(type != Type::Untracked);
426
427 const s64 difference = query_cpu_range - cpu_addr;
428 const u64 query_begin = std::max<s64>(difference, 0);
429 size += std::min<s64>(difference, 0);
430 if (query_begin >= SizeBytes() || size < 0) {
431 return;
432 }
433 u64* const untracked_words = Array<Type::Untracked>();
434 u64* const state_words = Array<type>();
435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
437 u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD);
438
439 const auto modified = [](u64 word) { return word != 0; };
440 const auto first_modified_word = std::find_if(words_begin, words_end, modified);
441 if (first_modified_word == words_end) {
442 // Exit early when the buffer is not modified
443 return;
444 }
445 const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified);
446
447 const u64 word_index_begin = std::distance(state_words, first_modified_word);
448 const u64 word_index_end = std::distance(state_words, last_modified_word);
449
450 const unsigned local_page_begin = std::countr_zero(*first_modified_word);
451 const unsigned local_page_end =
452 static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]);
453 const u64 word_page_begin = word_index_begin * PAGES_PER_WORD;
454 const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD;
455 const u64 query_page_begin = query_begin / BYTES_PER_PAGE;
456 const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE);
457 const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin);
458 const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end);
459 const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD;
460 const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1;
461
462 u64 page_begin = first_word_page_begin;
463 u64 current_base = 0;
464 u64 current_size = 0;
465 bool on_going = false;
466 for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) {
467 const bool is_last_word = word_index + 1 == word_index_end;
468 const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD;
469 const u64 right_offset = page_begin;
470 const u64 left_offset = PAGES_PER_WORD - page_end;
471 u64 bits = ~u64{0};
472 bits = (bits >> right_offset) << right_offset;
473 bits = (bits << left_offset) >> left_offset;
474
475 const u64 current_word = state_words[word_index] & bits;
476 if (clear) {
477 state_words[word_index] &= ~bits;
478 }
479
480 if constexpr (type == Type::CPU) {
481 const u64 current_bits = untracked_words[word_index] & bits;
482 untracked_words[word_index] &= ~bits;
483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
484 }
485 // Exclude CPU modified pages when visiting GPU pages
486 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
487 u64 page = page_begin;
488 page_begin = 0;
489
490 while (page < page_end) {
491 const int empty_bits = std::countr_zero(word >> page);
492 if (on_going && empty_bits != 0) {
493 InvokeModifiedRange(func, current_size, current_base);
494 current_size = 0;
495 on_going = false;
496 }
497 if (empty_bits == PAGES_PER_WORD) {
498 break;
499 }
500 page += empty_bits;
501
502 const int continuous_bits = std::countr_one(word >> page);
503 if (!on_going && continuous_bits != 0) {
504 current_base = word_index * PAGES_PER_WORD + page;
505 on_going = true;
506 }
507 current_size += continuous_bits;
508 page += continuous_bits;
509 }
510 }
511 if (on_going && current_size > 0) {
512 InvokeModifiedRange(func, current_size, current_base);
513 }
514 }
515
516 template <typename Func>
517 void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) {
518 const u64 current_size_bytes = current_size * BYTES_PER_PAGE;
519 const u64 offset_begin = current_base * BYTES_PER_PAGE;
520 const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes());
521 func(offset_begin, offset_end - offset_begin);
522 } 108 }
523 109
524 /** 110private:
525 * Returns true when a region has been modified
526 *
527 * @param offset Offset in bytes from the start of the buffer
528 * @param size Size in bytes of the region to query for modifications
529 */
530 template <Type type>
531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
532 static_assert(type != Type::Untracked);
533
534 const u64* const untracked_words = Array<Type::Untracked>();
535 const u64* const state_words = Array<type>();
536 const u64 num_query_words = size / BYTES_PER_WORD + 1;
537 const u64 word_begin = offset / BYTES_PER_WORD;
538 const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
542 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
543 const u64 word = state_words[word_index] & ~off_word;
544 if (word == 0) {
545 continue;
546 }
547 const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit);
548 const u64 local_page_end = page_end % PAGES_PER_WORD;
549 const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD;
550 if (((word >> page_index) << page_index) << page_end_shift != 0) {
551 return true;
552 }
553 }
554 return false;
555 }
556
557 /**
558 * Returns a begin end pair with the inclusive modified region
559 *
560 * @param offset Offset in bytes from the start of the buffer
561 * @param size Size in bytes of the region to query for modifications
562 */
563 template <Type type>
564 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
565 static_assert(type != Type::Untracked);
566
567 const u64* const untracked_words = Array<Type::Untracked>();
568 const u64* const state_words = Array<type>();
569 const u64 num_query_words = size / BYTES_PER_WORD + 1;
570 const u64 word_begin = offset / BYTES_PER_WORD;
571 const u64 word_end = std::min<u64>(word_begin + num_query_words, NumWords());
572 const u64 page_base = offset / BYTES_PER_PAGE;
573 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
574 u64 begin = std::numeric_limits<u64>::max();
575 u64 end = 0;
576 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
577 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
578 const u64 word = state_words[word_index] & ~off_word;
579 if (word == 0) {
580 continue;
581 }
582 const u64 local_page_begin = std::countr_zero(word);
583 const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
584 const u64 page_index = word_index * PAGES_PER_WORD;
585 const u64 page_begin = std::max(page_index + local_page_begin, page_base);
586 const u64 page_end = std::min(page_index + local_page_end, page_limit);
587 begin = std::min(begin, page_begin);
588 end = std::max(end, page_end);
589 }
590 static constexpr std::pair<u64, u64> EMPTY{0, 0};
591 return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
592 }
593
594 /// Returns the number of words of the buffer
595 [[nodiscard]] size_t NumWords() const noexcept {
596 return words.NumWords();
597 }
598
599 /// Returns true when the buffer fits in the small vector optimization
600 [[nodiscard]] bool IsShort() const noexcept {
601 return words.IsShort();
602 }
603
604 RasterizerInterface* rasterizer = nullptr;
605 VAddr cpu_addr = 0; 111 VAddr cpu_addr = 0;
606 Words words;
607 BufferFlagBits flags{}; 112 BufferFlagBits flags{};
608 int stream_score = 0; 113 int stream_score = 0;
609 size_t lru_id = SIZE_MAX; 114 size_t lru_id = SIZE_MAX;
115 size_t size_bytes = 0;
610}; 116};
611 117
612} // namespace VideoCommon 118} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp
index a16308b60..40db243d2 100644
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#include "common/microprofile.h" 4#include "common/microprofile.h"
5 5
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index abdc593df..e534e1e9c 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1,485 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <algorithm> 6#include <algorithm>
7#include <array>
8#include <memory> 7#include <memory>
9#include <mutex>
10#include <numeric> 8#include <numeric>
11#include <span>
12#include <vector>
13
14#include <boost/container/small_vector.hpp>
15#include <boost/icl/interval_set.hpp>
16
17#include "common/common_types.h"
18#include "common/div_ceil.h"
19#include "common/literals.h"
20#include "common/lru_cache.h"
21#include "common/microprofile.h"
22#include "common/polyfill_ranges.h"
23#include "common/scratch_buffer.h"
24#include "common/settings.h"
25#include "core/memory.h"
26#include "video_core/buffer_cache/buffer_base.h"
27#include "video_core/control/channel_state_cache.h"
28#include "video_core/delayed_destruction_ring.h"
29#include "video_core/dirty_flags.h"
30#include "video_core/engines/draw_manager.h"
31#include "video_core/engines/kepler_compute.h"
32#include "video_core/engines/maxwell_3d.h"
33#include "video_core/memory_manager.h"
34#include "video_core/rasterizer_interface.h"
35#include "video_core/surface.h"
36#include "video_core/texture_cache/slot_vector.h"
37#include "video_core/texture_cache/types.h"
38 9
39namespace VideoCommon { 10#include "video_core/buffer_cache/buffer_cache_base.h"
40
41MICROPROFILE_DECLARE(GPU_PrepareBuffers);
42MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
43MICROPROFILE_DECLARE(GPU_DownloadMemory);
44
45using BufferId = SlotId;
46
47using VideoCore::Surface::PixelFormat;
48using namespace Common::Literals;
49
50constexpr u32 NUM_VERTEX_BUFFERS = 32;
51constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
52constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
53constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
54constexpr u32 NUM_STORAGE_BUFFERS = 16;
55constexpr u32 NUM_TEXTURE_BUFFERS = 16;
56constexpr u32 NUM_STAGES = 5;
57
58enum class ObtainBufferSynchronize : u32 {
59 NoSynchronize = 0,
60 FullSynchronize = 1,
61 SynchronizeNoDirty = 2,
62};
63
64enum class ObtainBufferOperation : u32 {
65 DoNothing = 0,
66 MarkAsWritten = 1,
67 DiscardWrite = 2,
68 MarkQuery = 3,
69};
70
71using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
72using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
73
74template <typename P>
75class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
76
77 // Page size for caching purposes.
78 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
79 static constexpr u32 YUZU_PAGEBITS = 16;
80 static constexpr u64 YUZU_PAGESIZE = u64{1} << YUZU_PAGEBITS;
81
82 static constexpr bool IS_OPENGL = P::IS_OPENGL;
83 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
84 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
85 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
86 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
87 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
88 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
89 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
90 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
91
92 static constexpr BufferId NULL_BUFFER_ID{0};
93
94 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
95 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
96 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
97
98 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
99
100 using Runtime = typename P::Runtime;
101 using Buffer = typename P::Buffer;
102
103 using IntervalSet = boost::icl::interval_set<VAddr>;
104 using IntervalType = typename IntervalSet::interval_type;
105
106 struct Empty {};
107
108 struct OverlapResult {
109 std::vector<BufferId> ids;
110 VAddr begin;
111 VAddr end;
112 bool has_stream_leap = false;
113 };
114
115 struct Binding {
116 VAddr cpu_addr{};
117 u32 size{};
118 BufferId buffer_id;
119 };
120
121 struct TextureBufferBinding : Binding {
122 PixelFormat format;
123 };
124
125 static constexpr Binding NULL_BINDING{
126 .cpu_addr = 0,
127 .size = 0,
128 .buffer_id = NULL_BUFFER_ID,
129 };
130
131public:
132 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
133
134 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
135 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
136
137 void TickFrame();
138
139 void WriteMemory(VAddr cpu_addr, u64 size);
140
141 void CachedWriteMemory(VAddr cpu_addr, u64 size);
142
143 void DownloadMemory(VAddr cpu_addr, u64 size);
144
145 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
146
147 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
148
149 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
150
151 void UpdateGraphicsBuffers(bool is_indexed);
152
153 void UpdateComputeBuffers();
154
155 void BindHostGeometryBuffers(bool is_indexed);
156
157 void BindHostStageBuffers(size_t stage);
158
159 void BindHostComputeBuffers();
160
161 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
162 const UniformBufferSizes* sizes);
163
164 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
165
166 void UnbindGraphicsStorageBuffers(size_t stage);
167
168 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
169 bool is_written);
170
171 void UnbindGraphicsTextureBuffers(size_t stage);
172
173 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
174 PixelFormat format, bool is_written, bool is_image);
175
176 void UnbindComputeStorageBuffers();
177
178 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
179 bool is_written);
180
181 void UnbindComputeTextureBuffers();
182
183 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
184 bool is_written, bool is_image);
185
186 void FlushCachedWrites();
187
188 /// Return true when there are uncommitted buffers to be downloaded
189 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
190
191 void AccumulateFlushes();
192
193 /// Return true when the caller should wait for async downloads
194 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
195
196 /// Commit asynchronous downloads
197 void CommitAsyncFlushes();
198 void CommitAsyncFlushesHigh();
199
200 /// Pop asynchronous downloads
201 void PopAsyncFlushes();
202
203 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
204
205 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
206
207 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
208 ObtainBufferSynchronize sync_info,
209 ObtainBufferOperation post_op);
210
211 /// Return true when a CPU region is modified from the GPU
212 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
213
214 /// Return true when a region is registered on the cache
215 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
216
217 /// Return true when a CPU region is modified from the CPU
218 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
219
220 void SetDrawIndirect(
221 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
222 current_draw_indirect = current_draw_indirect_;
223 }
224
225 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
226
227 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
228
229 std::recursive_mutex mutex;
230 Runtime& runtime;
231
232private:
233 template <typename Func>
234 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
235 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
236 const int disabled_bits = std::countr_zero(enabled_mask);
237 index += disabled_bits;
238 enabled_mask >>= disabled_bits;
239 func(index);
240 }
241 }
242
243 template <typename Func>
244 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
245 const u64 page_end = Common::DivCeil(cpu_addr + size, YUZU_PAGESIZE);
246 for (u64 page = cpu_addr >> YUZU_PAGEBITS; page < page_end;) {
247 const BufferId buffer_id = page_table[page];
248 if (!buffer_id) {
249 ++page;
250 continue;
251 }
252 Buffer& buffer = slot_buffers[buffer_id];
253 func(buffer_id, buffer);
254
255 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
256 page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
257 }
258 }
259
260 template <typename Func>
261 void ForEachWrittenRange(VAddr cpu_addr, u64 size, Func&& func) {
262 const VAddr start_address = cpu_addr;
263 const VAddr end_address = start_address + size;
264 const VAddr search_base =
265 static_cast<VAddr>(std::min<s64>(0LL, static_cast<s64>(start_address - size)));
266 const IntervalType search_interval{search_base, search_base + 1};
267 auto it = common_ranges.lower_bound(search_interval);
268 if (it == common_ranges.end()) {
269 it = common_ranges.begin();
270 }
271 for (; it != common_ranges.end(); it++) {
272 VAddr inter_addr_end = it->upper();
273 VAddr inter_addr = it->lower();
274 if (inter_addr >= end_address) {
275 break;
276 }
277 if (inter_addr_end <= start_address) {
278 continue;
279 }
280 if (inter_addr_end > end_address) {
281 inter_addr_end = end_address;
282 }
283 if (inter_addr < start_address) {
284 inter_addr = start_address;
285 }
286 func(inter_addr, inter_addr_end);
287 }
288 }
289
290 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
291 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
292 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
293 }
294
295 void RunGarbageCollector();
296
297 void BindHostIndexBuffer();
298
299 void BindHostVertexBuffers();
300
301 void BindHostDrawIndirectBuffers();
302
303 void BindHostGraphicsUniformBuffers(size_t stage);
304
305 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
306
307 void BindHostGraphicsStorageBuffers(size_t stage);
308
309 void BindHostGraphicsTextureBuffers(size_t stage);
310
311 void BindHostTransformFeedbackBuffers();
312
313 void BindHostComputeUniformBuffers();
314
315 void BindHostComputeStorageBuffers();
316
317 void BindHostComputeTextureBuffers();
318
319 void DoUpdateGraphicsBuffers(bool is_indexed);
320
321 void DoUpdateComputeBuffers();
322
323 void UpdateIndexBuffer();
324
325 void UpdateVertexBuffers();
326
327 void UpdateVertexBuffer(u32 index);
328
329 void UpdateDrawIndirect();
330
331 void UpdateUniformBuffers(size_t stage);
332
333 void UpdateStorageBuffers(size_t stage);
334
335 void UpdateTextureBuffers(size_t stage);
336
337 void UpdateTransformFeedbackBuffers();
338
339 void UpdateTransformFeedbackBuffer(u32 index);
340
341 void UpdateComputeUniformBuffers();
342
343 void UpdateComputeStorageBuffers();
344
345 void UpdateComputeTextureBuffers();
346
347 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
348
349 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
350
351 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
352
353 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
354
355 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
356
357 void Register(BufferId buffer_id);
358
359 void Unregister(BufferId buffer_id);
360
361 template <bool insert>
362 void ChangeRegister(BufferId buffer_id);
363
364 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
365
366 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
367
368 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
369
370 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
371 std::span<BufferCopy> copies);
372
373 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
374 std::span<const BufferCopy> copies);
375
376 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
377
378 void DownloadBufferMemory(Buffer& buffer_id);
379
380 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
381
382 void DeleteBuffer(BufferId buffer_id);
383
384 void NotifyBufferDeletion();
385
386 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
387 bool is_written = false) const;
388
389 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
390 PixelFormat format);
391
392 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
393
394 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
395
396 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
397
398 void ClearDownload(IntervalType subtract_interval);
399
400 VideoCore::RasterizerInterface& rasterizer;
401 Core::Memory::Memory& cpu_memory;
402
403 SlotVector<Buffer> slot_buffers;
404 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
405
406 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
407
408 u32 last_index_count = 0;
409
410 Binding index_buffer;
411 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
412 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
413 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
414 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
415 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
416 Binding count_buffer_binding;
417 Binding indirect_buffer_binding;
418
419 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
420 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
421 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
422
423 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
424 u32 enabled_compute_uniform_buffer_mask = 0;
425
426 const UniformBufferSizes* uniform_buffer_sizes{};
427 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
428
429 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
430 std::array<u32, NUM_STAGES> written_storage_buffers{};
431 u32 enabled_compute_storage_buffers = 0;
432 u32 written_compute_storage_buffers = 0;
433
434 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
435 std::array<u32, NUM_STAGES> written_texture_buffers{};
436 std::array<u32, NUM_STAGES> image_texture_buffers{};
437 u32 enabled_compute_texture_buffers = 0;
438 u32 written_compute_texture_buffers = 0;
439 u32 image_compute_texture_buffers = 0;
440
441 std::array<u32, 16> uniform_cache_hits{};
442 std::array<u32, 16> uniform_cache_shots{};
443
444 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
445
446 bool has_deleted_buffers = false;
447 11
448 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> 12namespace VideoCommon {
449 dirty_uniform_buffers{};
450 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
451 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
452 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
453 uniform_buffer_binding_sizes{};
454
455 std::vector<BufferId> cached_write_buffer_ids;
456
457 IntervalSet uncommitted_ranges;
458 IntervalSet common_ranges;
459 std::deque<IntervalSet> committed_ranges;
460
461 Common::ScratchBuffer<u8> immediate_buffer_alloc;
462
463 struct LRUItemParams {
464 using ObjectType = BufferId;
465 using TickType = u64;
466 };
467 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
468 u64 frame_tick = 0;
469 u64 total_used_memory = 0;
470 u64 minimum_memory = 0;
471 u64 critical_memory = 0;
472 13
473 std::array<BufferId, ((1ULL << 39) >> YUZU_PAGEBITS)> page_table; 14using Core::Memory::YUZU_PAGESIZE;
474};
475 15
476template <class P> 16template <class P>
477BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, 17BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
478 Core::Memory::Memory& cpu_memory_, Runtime& runtime_) 18 Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
479 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} { 19 : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_}, memory_tracker{
20 rasterizer} {
480 // Ensure the first slot is used for the null buffer 21 // Ensure the first slot is used for the null buffer
481 void(slot_buffers.insert(runtime, NullBufferParams{})); 22 void(slot_buffers.insert(runtime, NullBufferParams{}));
482 common_ranges.clear(); 23 common_ranges.clear();
24 inline_buffer_id = NULL_BUFFER_ID;
25
26 active_async_buffers = !Settings::IsGPULevelHigh();
483 27
484 if (!runtime.CanReportMemoryUsage()) { 28 if (!runtime.CanReportMemoryUsage()) {
485 minimum_memory = DEFAULT_EXPECTED_MEMORY; 29 minimum_memory = DEFAULT_EXPECTED_MEMORY;
@@ -531,6 +75,8 @@ void BufferCache<P>::TickFrame() {
531 uniform_cache_hits[0] = 0; 75 uniform_cache_hits[0] = 0;
532 uniform_cache_shots[0] = 0; 76 uniform_cache_shots[0] = 0;
533 77
78 active_async_buffers = !Settings::IsGPULevelHigh();
79
534 const bool skip_preferred = hits * 256 < shots * 251; 80 const bool skip_preferred = hits * 256 < shots * 251;
535 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 81 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
536 82
@@ -543,35 +89,62 @@ void BufferCache<P>::TickFrame() {
543 } 89 }
544 ++frame_tick; 90 ++frame_tick;
545 delayed_destruction_ring.Tick(); 91 delayed_destruction_ring.Tick();
92
93 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
94 for (auto& buffer : async_buffers_death_ring) {
95 runtime.FreeDeferredStagingBuffer(buffer);
96 }
97 async_buffers_death_ring.clear();
98 }
546} 99}
547 100
548template <class P> 101template <class P>
549void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { 102void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) {
550 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 103 memory_tracker.MarkRegionAsCpuModified(cpu_addr, size);
551 buffer.MarkRegionAsCpuModified(cpu_addr, size); 104 if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
552 }); 105 const IntervalType subtract_interval{cpu_addr, cpu_addr + size};
106 ClearDownload(subtract_interval);
107 common_ranges.subtract(subtract_interval);
108 }
553} 109}
554 110
555template <class P> 111template <class P>
556void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { 112void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
557 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 113 memory_tracker.CachedCpuWrite(cpu_addr, size);
558 if (!buffer.HasCachedWrites()) { 114 const IntervalType add_interval{Common::AlignDown(cpu_addr, YUZU_PAGESIZE),
559 cached_write_buffer_ids.push_back(buffer_id); 115 Common::AlignUp(cpu_addr + size, YUZU_PAGESIZE)};
560 } 116 cached_ranges.add(add_interval);
561 buffer.CachedCpuWrite(cpu_addr, size);
562 });
563} 117}
564 118
565template <class P> 119template <class P>
566void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 120void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
121 WaitOnAsyncFlushes(cpu_addr, size);
567 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 122 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
568 DownloadBufferMemory(buffer, cpu_addr, size); 123 DownloadBufferMemory(buffer, cpu_addr, size);
569 }); 124 });
570} 125}
571 126
572template <class P> 127template <class P>
128void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) {
129 bool must_wait = false;
130 ForEachInOverlapCounter(async_downloads, cpu_addr, size,
131 [&](VAddr, VAddr, int) { must_wait = true; });
132 bool must_release = false;
133 ForEachInRangeSet(pending_ranges, cpu_addr, size, [&](VAddr, VAddr) { must_release = true; });
134 if (must_release) {
135 std::function<void()> tmp([]() {});
136 rasterizer.SignalFence(std::move(tmp));
137 }
138 if (must_wait || must_release) {
139 rasterizer.ReleaseFences();
140 }
141}
142
143template <class P>
573void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { 144void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
145 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024);
574 uncommitted_ranges.subtract(subtract_interval); 146 uncommitted_ranges.subtract(subtract_interval);
147 pending_ranges.subtract(subtract_interval);
575 for (auto& interval_set : committed_ranges) { 148 for (auto& interval_set : committed_ranges) {
576 interval_set.subtract(subtract_interval); 149 interval_set.subtract(subtract_interval);
577 } 150 }
@@ -591,6 +164,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
591 } 164 }
592 165
593 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; 166 const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount};
167 WaitOnAsyncFlushes(*cpu_src_address, static_cast<u32>(amount));
594 ClearDownload(subtract_interval); 168 ClearDownload(subtract_interval);
595 169
596 BufferId buffer_a; 170 BufferId buffer_a;
@@ -616,10 +190,11 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
616 const VAddr diff = base_address - *cpu_src_address; 190 const VAddr diff = base_address - *cpu_src_address;
617 const VAddr new_base_address = *cpu_dest_address + diff; 191 const VAddr new_base_address = *cpu_dest_address + diff;
618 const IntervalType add_interval{new_base_address, new_base_address + size}; 192 const IntervalType add_interval{new_base_address, new_base_address + size};
619 uncommitted_ranges.add(add_interval);
620 tmp_intervals.push_back(add_interval); 193 tmp_intervals.push_back(add_interval);
194 uncommitted_ranges.add(add_interval);
195 pending_ranges.add(add_interval);
621 }; 196 };
622 ForEachWrittenRange(*cpu_src_address, amount, mirror); 197 ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror);
623 // This subtraction in this order is important for overlapping copies. 198 // This subtraction in this order is important for overlapping copies.
624 common_ranges.subtract(subtract_interval); 199 common_ranges.subtract(subtract_interval);
625 const bool has_new_downloads = tmp_intervals.size() != 0; 200 const bool has_new_downloads = tmp_intervals.size() != 0;
@@ -628,7 +203,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
628 } 203 }
629 runtime.CopyBuffer(dest_buffer, src_buffer, copies); 204 runtime.CopyBuffer(dest_buffer, src_buffer, copies);
630 if (has_new_downloads) { 205 if (has_new_downloads) {
631 dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); 206 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
632 } 207 }
633 std::vector<u8> tmp_buffer(amount); 208 std::vector<u8> tmp_buffer(amount);
634 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 209 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
@@ -866,10 +441,9 @@ void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_add
866 441
867template <class P> 442template <class P>
868void BufferCache<P>::FlushCachedWrites() { 443void BufferCache<P>::FlushCachedWrites() {
869 for (const BufferId buffer_id : cached_write_buffer_ids) {
870 slot_buffers[buffer_id].FlushCachedWrites();
871 }
872 cached_write_buffer_ids.clear(); 444 cached_write_buffer_ids.clear();
445 memory_tracker.FlushCachedWrites();
446 cached_ranges.clear();
873} 447}
874 448
875template <class P> 449template <class P>
@@ -879,10 +453,6 @@ bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
879 453
880template <class P> 454template <class P>
881void BufferCache<P>::AccumulateFlushes() { 455void BufferCache<P>::AccumulateFlushes() {
882 if (Settings::values.gpu_accuracy.GetValue() != Settings::GPUAccuracy::High) {
883 uncommitted_ranges.clear();
884 return;
885 }
886 if (uncommitted_ranges.empty()) { 456 if (uncommitted_ranges.empty()) {
887 return; 457 return;
888 } 458 }
@@ -891,7 +461,11 @@ void BufferCache<P>::AccumulateFlushes() {
891 461
892template <class P> 462template <class P>
893bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { 463bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
894 return false; 464 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
465 return (!async_buffers.empty() && async_buffers.front().has_value());
466 } else {
467 return false;
468 }
895} 469}
896 470
897template <class P> 471template <class P>
@@ -899,12 +473,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
899 AccumulateFlushes(); 473 AccumulateFlushes();
900 474
901 if (committed_ranges.empty()) { 475 if (committed_ranges.empty()) {
476 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
477 if (active_async_buffers) {
478 async_buffers.emplace_back(std::optional<Async_Buffer>{});
479 }
480 }
902 return; 481 return;
903 } 482 }
904 MICROPROFILE_SCOPE(GPU_DownloadMemory); 483 MICROPROFILE_SCOPE(GPU_DownloadMemory);
905 const bool is_accuracy_normal =
906 Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
907 484
485 pending_ranges.clear();
908 auto it = committed_ranges.begin(); 486 auto it = committed_ranges.begin();
909 while (it != committed_ranges.end()) { 487 while (it != committed_ranges.end()) {
910 auto& current_intervals = *it; 488 auto& current_intervals = *it;
@@ -926,11 +504,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
926 const std::size_t size = interval.upper() - interval.lower(); 504 const std::size_t size = interval.upper() - interval.lower();
927 const VAddr cpu_addr = interval.lower(); 505 const VAddr cpu_addr = interval.lower();
928 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { 506 ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
929 buffer.ForEachDownloadRangeAndClear( 507 const VAddr buffer_start = buffer.CpuAddr();
930 cpu_addr, size, [&](u64 range_offset, u64 range_size) { 508 const VAddr buffer_end = buffer_start + buffer.SizeBytes();
931 if (is_accuracy_normal) { 509 const VAddr new_start = std::max(buffer_start, cpu_addr);
932 return; 510 const VAddr new_end = std::min(buffer_end, cpu_addr + size);
933 } 511 memory_tracker.ForEachDownloadRange(
512 new_start, new_end - new_start, false, [&](u64 cpu_addr_out, u64 range_size) {
934 const VAddr buffer_addr = buffer.CpuAddr(); 513 const VAddr buffer_addr = buffer.CpuAddr();
935 const auto add_download = [&](VAddr start, VAddr end) { 514 const auto add_download = [&](VAddr start, VAddr end) {
936 const u64 new_offset = start - buffer_addr; 515 const u64 new_offset = start - buffer_addr;
@@ -944,92 +523,142 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
944 buffer_id, 523 buffer_id,
945 }); 524 });
946 // Align up to avoid cache conflicts 525 // Align up to avoid cache conflicts
947 constexpr u64 align = 8ULL; 526 constexpr u64 align = 64ULL;
948 constexpr u64 mask = ~(align - 1ULL); 527 constexpr u64 mask = ~(align - 1ULL);
949 total_size_bytes += (new_size + align - 1) & mask; 528 total_size_bytes += (new_size + align - 1) & mask;
950 largest_copy = std::max(largest_copy, new_size); 529 largest_copy = std::max(largest_copy, new_size);
951 }; 530 };
952 531
953 const VAddr start_address = buffer_addr + range_offset; 532 ForEachInRangeSet(common_ranges, cpu_addr_out, range_size, add_download);
954 const VAddr end_address = start_address + range_size;
955 ForEachWrittenRange(start_address, range_size, add_download);
956 const IntervalType subtract_interval{start_address, end_address};
957 common_ranges.subtract(subtract_interval);
958 }); 533 });
959 }); 534 });
960 } 535 }
961 } 536 }
962 committed_ranges.clear(); 537 committed_ranges.clear();
963 if (downloads.empty()) { 538 if (downloads.empty()) {
539 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
540 if (active_async_buffers) {
541 async_buffers.emplace_back(std::optional<Async_Buffer>{});
542 }
543 }
964 return; 544 return;
965 } 545 }
966 if constexpr (USE_MEMORY_MAPS) { 546 if (active_async_buffers) {
967 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); 547 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
968 runtime.PreCopyBarrier(); 548 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true);
969 for (auto& [copy, buffer_id] : downloads) { 549 boost::container::small_vector<BufferCopy, 4> normalized_copies;
970 // Have in mind the staging buffer offset for the copy 550 IntervalSet new_async_range{};
971 copy.dst_offset += download_staging.offset; 551 runtime.PreCopyBarrier();
972 const std::array copies{copy}; 552 for (auto& [copy, buffer_id] : downloads) {
973 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); 553 copy.dst_offset += download_staging.offset;
974 } 554 const std::array copies{copy};
975 runtime.PostCopyBarrier(); 555 BufferCopy second_copy{copy};
976 runtime.Finish(); 556 Buffer& buffer = slot_buffers[buffer_id];
977 for (const auto& [copy, buffer_id] : downloads) { 557 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset;
978 const Buffer& buffer = slot_buffers[buffer_id]; 558 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset);
979 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 559 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size};
980 // Undo the modified offset 560 async_downloads += std::make_pair(base_interval, 1);
981 const u64 dst_offset = copy.dst_offset - download_staging.offset; 561 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false);
982 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; 562 normalized_copies.push_back(second_copy);
983 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); 563 }
564 runtime.PostCopyBarrier();
565 pending_downloads.emplace_back(std::move(normalized_copies));
566 async_buffers.emplace_back(download_staging);
567 } else {
568 committed_ranges.clear();
569 uncommitted_ranges.clear();
984 } 570 }
985 } else { 571 } else {
986 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); 572 if constexpr (USE_MEMORY_MAPS) {
987 for (const auto& [copy, buffer_id] : downloads) { 573 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
988 Buffer& buffer = slot_buffers[buffer_id]; 574 runtime.PreCopyBarrier();
989 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); 575 for (auto& [copy, buffer_id] : downloads) {
990 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; 576 // Have in mind the staging buffer offset for the copy
991 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 577 copy.dst_offset += download_staging.offset;
578 const std::array copies{copy};
579 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false);
580 }
581 runtime.PostCopyBarrier();
582 runtime.Finish();
583 for (const auto& [copy, buffer_id] : downloads) {
584 const Buffer& buffer = slot_buffers[buffer_id];
585 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
586 // Undo the modified offset
587 const u64 dst_offset = copy.dst_offset - download_staging.offset;
588 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset;
589 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size);
590 }
591 } else {
592 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
593 for (const auto& [copy, buffer_id] : downloads) {
594 Buffer& buffer = slot_buffers[buffer_id];
595 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
596 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset;
597 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
598 }
992 } 599 }
993 } 600 }
994} 601}
995 602
996template <class P> 603template <class P>
997void BufferCache<P>::CommitAsyncFlushes() { 604void BufferCache<P>::CommitAsyncFlushes() {
998 if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { 605 CommitAsyncFlushesHigh();
999 CommitAsyncFlushesHigh();
1000 } else {
1001 uncommitted_ranges.clear();
1002 committed_ranges.clear();
1003 }
1004} 606}
1005 607
1006template <class P> 608template <class P>
1007void BufferCache<P>::PopAsyncFlushes() {} 609void BufferCache<P>::PopAsyncFlushes() {
610 MICROPROFILE_SCOPE(GPU_DownloadMemory);
611 PopAsyncBuffers();
612}
1008 613
1009template <class P> 614template <class P>
1010bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { 615void BufferCache<P>::PopAsyncBuffers() {
1011 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); 616 if (async_buffers.empty()) {
1012 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { 617 return;
1013 const BufferId image_id = page_table[page]; 618 }
1014 if (!image_id) { 619 if (!async_buffers.front().has_value()) {
1015 ++page; 620 async_buffers.pop_front();
1016 continue; 621 return;
1017 } 622 }
1018 Buffer& buffer = slot_buffers[image_id]; 623 if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
1019 if (buffer.IsRegionGpuModified(addr, size)) { 624 auto& downloads = pending_downloads.front();
1020 return true; 625 auto& async_buffer = async_buffers.front();
626 u8* base = async_buffer->mapped_span.data();
627 const size_t base_offset = async_buffer->offset;
628 for (const auto& copy : downloads) {
629 const VAddr cpu_addr = static_cast<VAddr>(copy.src_offset);
630 const u64 dst_offset = copy.dst_offset - base_offset;
631 const u8* read_mapped_memory = base + dst_offset;
632 ForEachInOverlapCounter(
633 async_downloads, cpu_addr, copy.size, [&](VAddr start, VAddr end, int count) {
634 cpu_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - cpu_addr],
635 end - start);
636 if (count == 1) {
637 const IntervalType base_interval{start, end};
638 common_ranges.subtract(base_interval);
639 }
640 });
641 const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size};
642 RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1);
1021 } 643 }
1022 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); 644 async_buffers_death_ring.emplace_back(*async_buffer);
1023 page = Common::DivCeil(end_addr, YUZU_PAGESIZE); 645 async_buffers.pop_front();
646 pending_downloads.pop_front();
1024 } 647 }
1025 return false; 648}
649
650template <class P>
651bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
652 bool is_dirty = false;
653 ForEachInRangeSet(common_ranges, addr, size, [&](VAddr, VAddr) { is_dirty = true; });
654 return is_dirty;
1026} 655}
1027 656
1028template <class P> 657template <class P>
1029bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) { 658bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
1030 const VAddr end_addr = addr + size; 659 const VAddr end_addr = addr + size;
1031 const u64 page_end = Common::DivCeil(end_addr, YUZU_PAGESIZE); 660 const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
1032 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) { 661 for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
1033 const BufferId buffer_id = page_table[page]; 662 const BufferId buffer_id = page_table[page];
1034 if (!buffer_id) { 663 if (!buffer_id) {
1035 ++page; 664 ++page;
@@ -1041,28 +670,14 @@ bool BufferCache<P>::IsRegionRegistered(VAddr addr, size_t size) {
1041 if (buf_start_addr < end_addr && addr < buf_end_addr) { 670 if (buf_start_addr < end_addr && addr < buf_end_addr) {
1042 return true; 671 return true;
1043 } 672 }
1044 page = Common::DivCeil(end_addr, YUZU_PAGESIZE); 673 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
1045 } 674 }
1046 return false; 675 return false;
1047} 676}
1048 677
1049template <class P> 678template <class P>
1050bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { 679bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
1051 const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE); 680 return memory_tracker.IsRegionCpuModified(addr, size);
1052 for (u64 page = addr >> YUZU_PAGEBITS; page < page_end;) {
1053 const BufferId image_id = page_table[page];
1054 if (!image_id) {
1055 ++page;
1056 continue;
1057 }
1058 Buffer& buffer = slot_buffers[image_id];
1059 if (buffer.IsRegionCpuModified(addr, size)) {
1060 return true;
1061 }
1062 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
1063 page = Common::DivCeil(end_addr, YUZU_PAGESIZE);
1064 }
1065 return false;
1066} 681}
1067 682
1068template <class P> 683template <class P>
@@ -1072,7 +687,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
1072 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 687 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
1073 const u32 size = index_buffer.size; 688 const u32 size = index_buffer.size;
1074 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 689 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1075 if (!draw_state.inline_index_draw_indexes.empty()) { 690 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
1076 if constexpr (USE_MEMORY_MAPS) { 691 if constexpr (USE_MEMORY_MAPS) {
1077 auto upload_staging = runtime.UploadStagingBuffer(size); 692 auto upload_staging = runtime.UploadStagingBuffer(size);
1078 std::array<BufferCopy, 1> copies{ 693 std::array<BufferCopy, 1> copies{
@@ -1155,7 +770,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
1155 TouchBuffer(buffer, binding.buffer_id); 770 TouchBuffer(buffer, binding.buffer_id);
1156 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 771 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
1157 size <= uniform_buffer_skip_cache_size && 772 size <= uniform_buffer_skip_cache_size &&
1158 !buffer.IsRegionGpuModified(cpu_addr, size); 773 !memory_tracker.IsRegionGpuModified(cpu_addr, size);
1159 if (use_fast_buffer) { 774 if (use_fast_buffer) {
1160 if constexpr (IS_OPENGL) { 775 if constexpr (IS_OPENGL) {
1161 if (runtime.HasFastBufferSubData()) { 776 if (runtime.HasFastBufferSubData()) {
@@ -1378,27 +993,36 @@ void BufferCache<P>::UpdateIndexBuffer() {
1378 // We have to check for the dirty flags and index count 993 // We have to check for the dirty flags and index count
1379 // The index count is currently changed without updating the dirty flags 994 // The index count is currently changed without updating the dirty flags
1380 const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); 995 const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
1381 const auto& index_array = draw_state.index_buffer; 996 const auto& index_buffer_ref = draw_state.index_buffer;
1382 auto& flags = maxwell3d->dirty.flags; 997 auto& flags = maxwell3d->dirty.flags;
1383 if (!flags[Dirty::IndexBuffer]) { 998 if (!flags[Dirty::IndexBuffer]) {
1384 return; 999 return;
1385 } 1000 }
1386 flags[Dirty::IndexBuffer] = false; 1001 flags[Dirty::IndexBuffer] = false;
1387 last_index_count = index_array.count; 1002 if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
1388 if (!draw_state.inline_index_draw_indexes.empty()) {
1389 auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size()); 1003 auto inline_index_size = static_cast<u32>(draw_state.inline_index_draw_indexes.size());
1004 u32 buffer_size = Common::AlignUp(inline_index_size, CACHING_PAGESIZE);
1005 if (inline_buffer_id == NULL_BUFFER_ID) [[unlikely]] {
1006 inline_buffer_id = CreateBuffer(0, buffer_size);
1007 }
1008 if (slot_buffers[inline_buffer_id].SizeBytes() < buffer_size) [[unlikely]] {
1009 slot_buffers.erase(inline_buffer_id);
1010 inline_buffer_id = CreateBuffer(0, buffer_size);
1011 }
1390 index_buffer = Binding{ 1012 index_buffer = Binding{
1391 .cpu_addr = 0, 1013 .cpu_addr = 0,
1392 .size = inline_index_size, 1014 .size = inline_index_size,
1393 .buffer_id = CreateBuffer(0, inline_index_size), 1015 .buffer_id = inline_buffer_id,
1394 }; 1016 };
1395 return; 1017 return;
1396 } 1018 }
1397 const GPUVAddr gpu_addr_begin = index_array.StartAddress(); 1019
1398 const GPUVAddr gpu_addr_end = index_array.EndAddress(); 1020 const GPUVAddr gpu_addr_begin = index_buffer_ref.StartAddress();
1021 const GPUVAddr gpu_addr_end = index_buffer_ref.EndAddress();
1399 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1022 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1400 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); 1023 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1401 const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); 1024 const u32 draw_size =
1025 (index_buffer_ref.count + index_buffer_ref.first) * index_buffer_ref.FormatSizeInBytes();
1402 const u32 size = std::min(address_size, draw_size); 1026 const u32 size = std::min(address_size, draw_size);
1403 if (size == 0 || !cpu_addr) { 1027 if (size == 0 || !cpu_addr) {
1404 index_buffer = NULL_BINDING; 1028 index_buffer = NULL_BINDING;
@@ -1434,17 +1058,15 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
1434 const GPUVAddr gpu_addr_begin = array.Address(); 1058 const GPUVAddr gpu_addr_begin = array.Address();
1435 const GPUVAddr gpu_addr_end = limit.Address() + 1; 1059 const GPUVAddr gpu_addr_end = limit.Address() + 1;
1436 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin); 1060 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
1437 u32 address_size = static_cast<u32>( 1061 const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
1438 std::min(gpu_addr_end - gpu_addr_begin, static_cast<u64>(std::numeric_limits<u32>::max()))); 1062 u32 size = address_size; // TODO: Analyze stride and number of vertices
1439 if (array.enable == 0 || address_size == 0 || !cpu_addr) { 1063 if (array.enable == 0 || size == 0 || !cpu_addr) {
1440 vertex_buffers[index] = NULL_BINDING; 1064 vertex_buffers[index] = NULL_BINDING;
1441 return; 1065 return;
1442 } 1066 }
1443 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) { 1067 if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end)) {
1444 address_size = 1068 size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
1445 static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, address_size));
1446 } 1069 }
1447 const u32 size = address_size; // TODO: Analyze stride and number of vertices
1448 vertex_buffers[index] = Binding{ 1070 vertex_buffers[index] = Binding{
1449 .cpu_addr = *cpu_addr, 1071 .cpu_addr = *cpu_addr,
1450 .size = size, 1072 .size = size,
@@ -1591,17 +1213,16 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
1591 1213
1592template <class P> 1214template <class P>
1593void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { 1215void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
1594 Buffer& buffer = slot_buffers[buffer_id]; 1216 memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
1595 buffer.MarkRegionAsGpuModified(cpu_addr, size); 1217
1218 if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
1219 SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
1220 }
1596 1221
1597 const IntervalType base_interval{cpu_addr, cpu_addr + size}; 1222 const IntervalType base_interval{cpu_addr, cpu_addr + size};
1598 common_ranges.add(base_interval); 1223 common_ranges.add(base_interval);
1599
1600 const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
1601 if (!is_async) {
1602 return;
1603 }
1604 uncommitted_ranges.add(base_interval); 1224 uncommitted_ranges.add(base_interval);
1225 pending_ranges.add(base_interval);
1605} 1226}
1606 1227
1607template <class P> 1228template <class P>
@@ -1609,7 +1230,7 @@ BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) {
1609 if (cpu_addr == 0) { 1230 if (cpu_addr == 0) {
1610 return NULL_BUFFER_ID; 1231 return NULL_BUFFER_ID;
1611 } 1232 }
1612 const u64 page = cpu_addr >> YUZU_PAGEBITS; 1233 const u64 page = cpu_addr >> CACHING_PAGEBITS;
1613 const BufferId buffer_id = page_table[page]; 1234 const BufferId buffer_id = page_table[page];
1614 if (!buffer_id) { 1235 if (!buffer_id) {
1615 return CreateBuffer(cpu_addr, size); 1236 return CreateBuffer(cpu_addr, size);
@@ -1638,9 +1259,9 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1638 .has_stream_leap = has_stream_leap, 1259 .has_stream_leap = has_stream_leap,
1639 }; 1260 };
1640 } 1261 }
1641 for (; cpu_addr >> YUZU_PAGEBITS < Common::DivCeil(end, YUZU_PAGESIZE); 1262 for (; cpu_addr >> CACHING_PAGEBITS < Common::DivCeil(end, CACHING_PAGESIZE);
1642 cpu_addr += YUZU_PAGESIZE) { 1263 cpu_addr += CACHING_PAGESIZE) {
1643 const BufferId overlap_id = page_table[cpu_addr >> YUZU_PAGEBITS]; 1264 const BufferId overlap_id = page_table[cpu_addr >> CACHING_PAGEBITS];
1644 if (!overlap_id) { 1265 if (!overlap_id) {
1645 continue; 1266 continue;
1646 } 1267 }
@@ -1666,11 +1287,11 @@ typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu
1666 // as a stream buffer. Increase the size to skip constantly recreating buffers. 1287 // as a stream buffer. Increase the size to skip constantly recreating buffers.
1667 has_stream_leap = true; 1288 has_stream_leap = true;
1668 if (expands_right) { 1289 if (expands_right) {
1669 begin -= YUZU_PAGESIZE * 256; 1290 begin -= CACHING_PAGESIZE * 256;
1670 cpu_addr = begin; 1291 cpu_addr = begin;
1671 } 1292 }
1672 if (expands_left) { 1293 if (expands_left) {
1673 end += YUZU_PAGESIZE * 256; 1294 end += CACHING_PAGESIZE * 256;
1674 } 1295 }
1675 } 1296 }
1676 } 1297 }
@@ -1690,25 +1311,22 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
1690 if (accumulate_stream_score) { 1311 if (accumulate_stream_score) {
1691 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); 1312 new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1);
1692 } 1313 }
1693 std::vector<BufferCopy> copies; 1314 boost::container::small_vector<BufferCopy, 1> copies;
1694 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); 1315 const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr();
1695 overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { 1316 copies.push_back(BufferCopy{
1696 copies.push_back(BufferCopy{ 1317 .src_offset = 0,
1697 .src_offset = begin, 1318 .dst_offset = dst_base_offset,
1698 .dst_offset = dst_base_offset + begin, 1319 .size = overlap.SizeBytes(),
1699 .size = range_size,
1700 });
1701 new_buffer.UnmarkRegionAsCpuModified(begin, range_size);
1702 new_buffer.MarkRegionAsGpuModified(begin, range_size);
1703 }); 1320 });
1704 if (!copies.empty()) { 1321 runtime.CopyBuffer(new_buffer, overlap, copies);
1705 runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); 1322 DeleteBuffer(overlap_id, true);
1706 }
1707 DeleteBuffer(overlap_id);
1708} 1323}
1709 1324
1710template <class P> 1325template <class P>
1711BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { 1326BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1327 VAddr cpu_addr_end = Common::AlignUp(cpu_addr + wanted_size, CACHING_PAGESIZE);
1328 cpu_addr = Common::AlignDown(cpu_addr, CACHING_PAGESIZE);
1329 wanted_size = static_cast<u32>(cpu_addr_end - cpu_addr);
1712 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1330 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1713 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1331 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1714 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1332 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
@@ -1718,7 +1336,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1718 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1336 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1719 } 1337 }
1720 Register(new_buffer_id); 1338 Register(new_buffer_id);
1721 TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); 1339 TouchBuffer(new_buffer, new_buffer_id);
1722 return new_buffer_id; 1340 return new_buffer_id;
1723} 1341}
1724 1342
@@ -1746,8 +1364,8 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1746 } 1364 }
1747 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1365 const VAddr cpu_addr_begin = buffer.CpuAddr();
1748 const VAddr cpu_addr_end = cpu_addr_begin + size; 1366 const VAddr cpu_addr_end = cpu_addr_begin + size;
1749 const u64 page_begin = cpu_addr_begin / YUZU_PAGESIZE; 1367 const u64 page_begin = cpu_addr_begin / CACHING_PAGESIZE;
1750 const u64 page_end = Common::DivCeil(cpu_addr_end, YUZU_PAGESIZE); 1368 const u64 page_end = Common::DivCeil(cpu_addr_end, CACHING_PAGESIZE);
1751 for (u64 page = page_begin; page != page_end; ++page) { 1369 for (u64 page = page_begin; page != page_end; ++page) {
1752 if constexpr (insert) { 1370 if constexpr (insert) {
1753 page_table[page] = buffer_id; 1371 page_table[page] = buffer_id;
@@ -1766,9 +1384,6 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1766 1384
1767template <class P> 1385template <class P>
1768bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1386bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1769 if (buffer.CpuAddr() == 0) {
1770 return true;
1771 }
1772 return SynchronizeBufferImpl(buffer, cpu_addr, size); 1387 return SynchronizeBufferImpl(buffer, cpu_addr, size);
1773} 1388}
1774 1389
@@ -1777,10 +1392,11 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1777 boost::container::small_vector<BufferCopy, 4> copies; 1392 boost::container::small_vector<BufferCopy, 4> copies;
1778 u64 total_size_bytes = 0; 1393 u64 total_size_bytes = 0;
1779 u64 largest_copy = 0; 1394 u64 largest_copy = 0;
1780 buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { 1395 VAddr buffer_start = buffer.CpuAddr();
1396 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1781 copies.push_back(BufferCopy{ 1397 copies.push_back(BufferCopy{
1782 .src_offset = total_size_bytes, 1398 .src_offset = total_size_bytes,
1783 .dst_offset = range_offset, 1399 .dst_offset = cpu_addr_out - buffer_start,
1784 .size = range_size, 1400 .size = range_size,
1785 }); 1401 });
1786 total_size_bytes += range_size; 1402 total_size_bytes += range_size;
@@ -1795,6 +1411,51 @@ bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
1795} 1411}
1796 1412
1797template <class P> 1413template <class P>
1414bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size) {
1415 boost::container::small_vector<BufferCopy, 4> copies;
1416 u64 total_size_bytes = 0;
1417 u64 largest_copy = 0;
1418 IntervalSet found_sets{};
1419 auto make_copies = [&] {
1420 for (auto& interval : found_sets) {
1421 const std::size_t sub_size = interval.upper() - interval.lower();
1422 const VAddr cpu_addr_ = interval.lower();
1423 copies.push_back(BufferCopy{
1424 .src_offset = total_size_bytes,
1425 .dst_offset = cpu_addr_ - buffer.CpuAddr(),
1426 .size = sub_size,
1427 });
1428 total_size_bytes += sub_size;
1429 largest_copy = std::max<u64>(largest_copy, sub_size);
1430 }
1431 const std::span<BufferCopy> copies_span(copies.data(), copies.size());
1432 UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
1433 };
1434 memory_tracker.ForEachUploadRange(cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1435 const VAddr base_adr = cpu_addr_out;
1436 const VAddr end_adr = base_adr + range_size;
1437 const IntervalType add_interval{base_adr, end_adr};
1438 found_sets.add(add_interval);
1439 });
1440 if (found_sets.empty()) {
1441 return true;
1442 }
1443 const IntervalType search_interval{cpu_addr, cpu_addr + size};
1444 auto it = common_ranges.lower_bound(search_interval);
1445 auto it_end = common_ranges.upper_bound(search_interval);
1446 if (it == common_ranges.end()) {
1447 make_copies();
1448 return false;
1449 }
1450 while (it != it_end) {
1451 found_sets.subtract(*it);
1452 it++;
1453 }
1454 make_copies();
1455 return false;
1456}
1457
1458template <class P>
1798void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, 1459void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
1799 std::span<BufferCopy> copies) { 1460 std::span<BufferCopy> copies) {
1800 if constexpr (USE_MEMORY_MAPS) { 1461 if constexpr (USE_MEMORY_MAPS) {
@@ -1805,39 +1466,45 @@ void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 larg
1805} 1466}
1806 1467
1807template <class P> 1468template <class P>
1808void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, 1469void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
1809 std::span<const BufferCopy> copies) { 1470 [[maybe_unused]] u64 largest_copy,
1810 std::span<u8> immediate_buffer; 1471 [[maybe_unused]] std::span<const BufferCopy> copies) {
1811 for (const BufferCopy& copy : copies) { 1472 if constexpr (!USE_MEMORY_MAPS) {
1812 std::span<const u8> upload_span; 1473 std::span<u8> immediate_buffer;
1813 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1474 for (const BufferCopy& copy : copies) {
1814 if (IsRangeGranular(cpu_addr, copy.size)) { 1475 std::span<const u8> upload_span;
1815 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); 1476 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1816 } else { 1477 if (IsRangeGranular(cpu_addr, copy.size)) {
1817 if (immediate_buffer.empty()) { 1478 upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size);
1818 immediate_buffer = ImmediateBuffer(largest_copy); 1479 } else {
1480 if (immediate_buffer.empty()) {
1481 immediate_buffer = ImmediateBuffer(largest_copy);
1482 }
1483 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size);
1484 upload_span = immediate_buffer.subspan(0, copy.size);
1819 } 1485 }
1820 cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); 1486 buffer.ImmediateUpload(copy.dst_offset, upload_span);
1821 upload_span = immediate_buffer.subspan(0, copy.size);
1822 } 1487 }
1823 buffer.ImmediateUpload(copy.dst_offset, upload_span);
1824 } 1488 }
1825} 1489}
1826 1490
1827template <class P> 1491template <class P>
1828void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, 1492void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
1829 std::span<BufferCopy> copies) { 1493 [[maybe_unused]] u64 total_size_bytes,
1830 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); 1494 [[maybe_unused]] std::span<BufferCopy> copies) {
1831 const std::span<u8> staging_pointer = upload_staging.mapped_span; 1495 if constexpr (USE_MEMORY_MAPS) {
1832 for (BufferCopy& copy : copies) { 1496 auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
1833 u8* const src_pointer = staging_pointer.data() + copy.src_offset; 1497 const std::span<u8> staging_pointer = upload_staging.mapped_span;
1834 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; 1498 for (BufferCopy& copy : copies) {
1835 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); 1499 u8* const src_pointer = staging_pointer.data() + copy.src_offset;
1500 const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset;
1501 cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size);
1836 1502
1837 // Apply the staging offset 1503 // Apply the staging offset
1838 copy.src_offset += upload_staging.offset; 1504 copy.src_offset += upload_staging.offset;
1505 }
1506 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1839 } 1507 }
1840 runtime.CopyBuffer(buffer, upload_staging.buffer, copies);
1841} 1508}
1842 1509
1843template <class P> 1510template <class P>
@@ -1847,7 +1514,9 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
1847 if (!is_dirty) { 1514 if (!is_dirty) {
1848 return false; 1515 return false;
1849 } 1516 }
1850 if (!IsRegionGpuModified(dest_address, copy_size)) { 1517 VAddr aligned_start = Common::AlignDown(dest_address, YUZU_PAGESIZE);
1518 VAddr aligned_end = Common::AlignUp(dest_address + copy_size, YUZU_PAGESIZE);
1519 if (!IsRegionGpuModified(aligned_start, aligned_end - aligned_start)) {
1851 return false; 1520 return false;
1852 } 1521 }
1853 1522
@@ -1886,30 +1555,31 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1886 boost::container::small_vector<BufferCopy, 1> copies; 1555 boost::container::small_vector<BufferCopy, 1> copies;
1887 u64 total_size_bytes = 0; 1556 u64 total_size_bytes = 0;
1888 u64 largest_copy = 0; 1557 u64 largest_copy = 0;
1889 buffer.ForEachDownloadRangeAndClear(cpu_addr, size, [&](u64 range_offset, u64 range_size) { 1558 memory_tracker.ForEachDownloadRangeAndClear(
1890 const VAddr buffer_addr = buffer.CpuAddr(); 1559 cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
1891 const auto add_download = [&](VAddr start, VAddr end) { 1560 const VAddr buffer_addr = buffer.CpuAddr();
1892 const u64 new_offset = start - buffer_addr; 1561 const auto add_download = [&](VAddr start, VAddr end) {
1893 const u64 new_size = end - start; 1562 const u64 new_offset = start - buffer_addr;
1894 copies.push_back(BufferCopy{ 1563 const u64 new_size = end - start;
1895 .src_offset = new_offset, 1564 copies.push_back(BufferCopy{
1896 .dst_offset = total_size_bytes, 1565 .src_offset = new_offset,
1897 .size = new_size, 1566 .dst_offset = total_size_bytes,
1898 }); 1567 .size = new_size,
1899 // Align up to avoid cache conflicts 1568 });
1900 constexpr u64 align = 256ULL; 1569 // Align up to avoid cache conflicts
1901 constexpr u64 mask = ~(align - 1ULL); 1570 constexpr u64 align = 64ULL;
1902 total_size_bytes += (new_size + align - 1) & mask; 1571 constexpr u64 mask = ~(align - 1ULL);
1903 largest_copy = std::max(largest_copy, new_size); 1572 total_size_bytes += (new_size + align - 1) & mask;
1904 }; 1573 largest_copy = std::max(largest_copy, new_size);
1905 1574 };
1906 const VAddr start_address = buffer_addr + range_offset; 1575
1907 const VAddr end_address = start_address + range_size; 1576 const VAddr start_address = cpu_addr_out;
1908 ForEachWrittenRange(start_address, range_size, add_download); 1577 const VAddr end_address = start_address + range_size;
1909 const IntervalType subtract_interval{start_address, end_address}; 1578 ForEachInRangeSet(common_ranges, start_address, range_size, add_download);
1910 ClearDownload(subtract_interval); 1579 const IntervalType subtract_interval{start_address, end_address};
1911 common_ranges.subtract(subtract_interval); 1580 ClearDownload(subtract_interval);
1912 }); 1581 common_ranges.subtract(subtract_interval);
1582 });
1913 if (total_size_bytes == 0) { 1583 if (total_size_bytes == 0) {
1914 return; 1584 return;
1915 } 1585 }
@@ -1943,7 +1613,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
1943} 1613}
1944 1614
1945template <class P> 1615template <class P>
1946void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { 1616void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
1947 const auto scalar_replace = [buffer_id](Binding& binding) { 1617 const auto scalar_replace = [buffer_id](Binding& binding) {
1948 if (binding.buffer_id == buffer_id) { 1618 if (binding.buffer_id == buffer_id) {
1949 binding.buffer_id = BufferId{}; 1619 binding.buffer_id = BufferId{};
@@ -1962,8 +1632,10 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1962 std::erase(cached_write_buffer_ids, buffer_id); 1632 std::erase(cached_write_buffer_ids, buffer_id);
1963 1633
1964 // Mark the whole buffer as CPU written to stop tracking CPU writes 1634 // Mark the whole buffer as CPU written to stop tracking CPU writes
1965 Buffer& buffer = slot_buffers[buffer_id]; 1635 if (!do_not_mark) {
1966 buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); 1636 Buffer& buffer = slot_buffers[buffer_id];
1637 memory_tracker.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes());
1638 }
1967 1639
1968 Unregister(buffer_id); 1640 Unregister(buffer_id);
1969 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); 1641 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
@@ -2011,7 +1683,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
2011 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index); 1683 LOG_WARNING(HW_GPU, "Failed to find storage buffer for cbuf index {}", cbuf_index);
2012 return NULL_BINDING; 1684 return NULL_BINDING;
2013 } 1685 }
2014 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); 1686 const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, YUZU_PAGESIZE);
2015 const Binding binding{ 1687 const Binding binding{
2016 .cpu_addr = *cpu_addr, 1688 .cpu_addr = *cpu_addr,
2017 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr), 1689 .size = is_written ? size : static_cast<u32>(cpu_end - *cpu_addr),
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
new file mode 100644
index 000000000..656baa550
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -0,0 +1,580 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <array>
8#include <functional>
9#include <memory>
10#include <mutex>
11#include <numeric>
12#include <span>
13#include <unordered_map>
14#include <vector>
15
16#include <boost/container/small_vector.hpp>
17#define BOOST_NO_MT
18#include <boost/pool/detail/mutex.hpp>
19#undef BOOST_NO_MT
20#include <boost/icl/interval.hpp>
21#include <boost/icl/interval_base_set.hpp>
22#include <boost/icl/interval_set.hpp>
23#include <boost/icl/split_interval_map.hpp>
24#include <boost/pool/pool.hpp>
25#include <boost/pool/pool_alloc.hpp>
26#include <boost/pool/poolfwd.hpp>
27
28#include "common/common_types.h"
29#include "common/div_ceil.h"
30#include "common/literals.h"
31#include "common/lru_cache.h"
32#include "common/microprofile.h"
33#include "common/scope_exit.h"
34#include "common/settings.h"
35#include "core/memory.h"
36#include "video_core/buffer_cache/buffer_base.h"
37#include "video_core/control/channel_state_cache.h"
38#include "video_core/delayed_destruction_ring.h"
39#include "video_core/dirty_flags.h"
40#include "video_core/engines/draw_manager.h"
41#include "video_core/engines/kepler_compute.h"
42#include "video_core/engines/maxwell_3d.h"
43#include "video_core/memory_manager.h"
44#include "video_core/rasterizer_interface.h"
45#include "video_core/surface.h"
46#include "video_core/texture_cache/slot_vector.h"
47#include "video_core/texture_cache/types.h"
48
49namespace boost {
50template <typename T>
51class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
52}
53
54namespace VideoCommon {
55
56MICROPROFILE_DECLARE(GPU_PrepareBuffers);
57MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
58MICROPROFILE_DECLARE(GPU_DownloadMemory);
59
60using BufferId = SlotId;
61
62using VideoCore::Surface::PixelFormat;
63using namespace Common::Literals;
64
65constexpr u32 NUM_VERTEX_BUFFERS = 32;
66constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4;
67constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18;
68constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
69constexpr u32 NUM_STORAGE_BUFFERS = 16;
70constexpr u32 NUM_TEXTURE_BUFFERS = 16;
71constexpr u32 NUM_STAGES = 5;
72
73using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
74using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
75
76enum class ObtainBufferSynchronize : u32 {
77 NoSynchronize = 0,
78 FullSynchronize = 1,
79 SynchronizeNoDirty = 2,
80};
81
82enum class ObtainBufferOperation : u32 {
83 DoNothing = 0,
84 MarkAsWritten = 1,
85 DiscardWrite = 2,
86 MarkQuery = 3,
87};
88
89template <typename P>
90class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
91 // Page size for caching purposes.
92 // This is unrelated to the CPU page size and it can be changed as it seems optimal.
93 static constexpr u32 CACHING_PAGEBITS = 16;
94 static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
95
96 static constexpr bool IS_OPENGL = P::IS_OPENGL;
97 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS =
98 P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS;
99 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT =
100 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
101 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
102 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
103 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
104 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
105 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
106
107 static constexpr BufferId NULL_BUFFER_ID{0};
108
109 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 512_MiB;
110 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB;
111 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
112
113 // Debug Flags.
114
115 static constexpr bool DISABLE_DOWNLOADS = true;
116
117 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
118
119 using Runtime = typename P::Runtime;
120 using Buffer = typename P::Buffer;
121 using Async_Buffer = typename P::Async_Buffer;
122 using MemoryTracker = typename P::MemoryTracker;
123
124 using IntervalCompare = std::less<VAddr>;
125 using IntervalInstance = boost::icl::interval_type_default<VAddr, std::less>;
126 using IntervalAllocator = boost::fast_pool_allocator<VAddr>;
127 using IntervalSet = boost::icl::interval_set<VAddr>;
128 using IntervalType = typename IntervalSet::interval_type;
129
130 template <typename Type>
131 struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
132 // types
133 typedef counter_add_functor<Type> type;
134 typedef boost::icl::identity_based_inplace_combine<Type> base_type;
135
136 // public member functions
137 void operator()(Type& current, const Type& added) const {
138 current += added;
139 if (current < base_type::identity_element()) {
140 current = base_type::identity_element();
141 }
142 }
143
144 // public static functions
145 static void version(Type&){};
146 };
147
148 using OverlapCombine = counter_add_functor<int>;
149 using OverlapSection = boost::icl::inter_section<int>;
150 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
151
152 struct Empty {};
153
154 struct OverlapResult {
155 std::vector<BufferId> ids;
156 VAddr begin;
157 VAddr end;
158 bool has_stream_leap = false;
159 };
160
161 struct Binding {
162 VAddr cpu_addr{};
163 u32 size{};
164 BufferId buffer_id;
165 };
166
167 struct TextureBufferBinding : Binding {
168 PixelFormat format;
169 };
170
171 static constexpr Binding NULL_BINDING{
172 .cpu_addr = 0,
173 .size = 0,
174 .buffer_id = NULL_BUFFER_ID,
175 };
176
177public:
178 static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
179
180 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
181 Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
182
183 void TickFrame();
184
185 void WriteMemory(VAddr cpu_addr, u64 size);
186
187 void CachedWriteMemory(VAddr cpu_addr, u64 size);
188
189 void DownloadMemory(VAddr cpu_addr, u64 size);
190
191 bool InlineMemory(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
192
193 void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size);
194
195 void DisableGraphicsUniformBuffer(size_t stage, u32 index);
196
197 void UpdateGraphicsBuffers(bool is_indexed);
198
199 void UpdateComputeBuffers();
200
201 void BindHostGeometryBuffers(bool is_indexed);
202
203 void BindHostStageBuffers(size_t stage);
204
205 void BindHostComputeBuffers();
206
207 void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
208 const UniformBufferSizes* sizes);
209
210 void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes);
211
212 void UnbindGraphicsStorageBuffers(size_t stage);
213
214 void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
215 bool is_written);
216
217 void UnbindGraphicsTextureBuffers(size_t stage);
218
219 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
220 PixelFormat format, bool is_written, bool is_image);
221
222 void UnbindComputeStorageBuffers();
223
224 void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset,
225 bool is_written);
226
227 void UnbindComputeTextureBuffers();
228
229 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
230 bool is_written, bool is_image);
231
232 [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size,
233 ObtainBufferSynchronize sync_info,
234 ObtainBufferOperation post_op);
235 void FlushCachedWrites();
236
237 /// Return true when there are uncommitted buffers to be downloaded
238 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
239
240 void AccumulateFlushes();
241
242 /// Return true when the caller should wait for async downloads
243 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
244
245 /// Commit asynchronous downloads
246 void CommitAsyncFlushes();
247 void CommitAsyncFlushesHigh();
248
249 /// Pop asynchronous downloads
250 void PopAsyncFlushes();
251 void PopAsyncBuffers();
252
253 bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount);
254
255 bool DMAClear(GPUVAddr src_address, u64 amount, u32 value);
256
257 /// Return true when a CPU region is modified from the GPU
258 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
259
260 /// Return true when a region is registered on the cache
261 [[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);
262
263 /// Return true when a CPU region is modified from the CPU
264 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
265
266 void SetDrawIndirect(
267 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect_) {
268 current_draw_indirect = current_draw_indirect_;
269 }
270
271 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectCount();
272
273 [[nodiscard]] std::pair<Buffer*, u32> GetDrawIndirectBuffer();
274
275 std::recursive_mutex mutex;
276 Runtime& runtime;
277
278private:
279 template <typename Func>
280 static void ForEachEnabledBit(u32 enabled_mask, Func&& func) {
281 for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) {
282 const int disabled_bits = std::countr_zero(enabled_mask);
283 index += disabled_bits;
284 enabled_mask >>= disabled_bits;
285 func(index);
286 }
287 }
288
289 template <typename Func>
290 void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) {
291 const u64 page_end = Common::DivCeil(cpu_addr + size, CACHING_PAGESIZE);
292 for (u64 page = cpu_addr >> CACHING_PAGEBITS; page < page_end;) {
293 const BufferId buffer_id = page_table[page];
294 if (!buffer_id) {
295 ++page;
296 continue;
297 }
298 Buffer& buffer = slot_buffers[buffer_id];
299 func(buffer_id, buffer);
300
301 const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
302 page = Common::DivCeil(end_addr, CACHING_PAGESIZE);
303 }
304 }
305
306 template <typename Func>
307 void ForEachInRangeSet(IntervalSet& current_range, VAddr cpu_addr, u64 size, Func&& func) {
308 const VAddr start_address = cpu_addr;
309 const VAddr end_address = start_address + size;
310 const IntervalType search_interval{start_address, end_address};
311 auto it = current_range.lower_bound(search_interval);
312 if (it == current_range.end()) {
313 return;
314 }
315 auto end_it = current_range.upper_bound(search_interval);
316 for (; it != end_it; it++) {
317 VAddr inter_addr_end = it->upper();
318 VAddr inter_addr = it->lower();
319 if (inter_addr_end > end_address) {
320 inter_addr_end = end_address;
321 }
322 if (inter_addr < start_address) {
323 inter_addr = start_address;
324 }
325 func(inter_addr, inter_addr_end);
326 }
327 }
328
329 template <typename Func>
330 void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
331 Func&& func) {
332 const VAddr start_address = cpu_addr;
333 const VAddr end_address = start_address + size;
334 const IntervalType search_interval{start_address, end_address};
335 auto it = current_range.lower_bound(search_interval);
336 if (it == current_range.end()) {
337 return;
338 }
339 auto end_it = current_range.upper_bound(search_interval);
340 for (; it != end_it; it++) {
341 auto& inter = it->first;
342 VAddr inter_addr_end = inter.upper();
343 VAddr inter_addr = inter.lower();
344 if (inter_addr_end > end_address) {
345 inter_addr_end = end_address;
346 }
347 if (inter_addr < start_address) {
348 inter_addr = start_address;
349 }
350 func(inter_addr, inter_addr_end, it->second);
351 }
352 }
353
354 void RemoveEachInOverlapCounter(OverlapCounter& current_range,
355 const IntervalType search_interval, int subtract_value) {
356 bool any_removals = false;
357 current_range.add(std::make_pair(search_interval, subtract_value));
358 do {
359 any_removals = false;
360 auto it = current_range.lower_bound(search_interval);
361 if (it == current_range.end()) {
362 return;
363 }
364 auto end_it = current_range.upper_bound(search_interval);
365 for (; it != end_it; it++) {
366 if (it->second <= 0) {
367 any_removals = true;
368 current_range.erase(it);
369 break;
370 }
371 }
372 } while (any_removals);
373 }
374
375 static bool IsRangeGranular(VAddr cpu_addr, size_t size) {
376 return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) ==
377 ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK);
378 }
379
380 void RunGarbageCollector();
381
382 void WaitOnAsyncFlushes(VAddr cpu_addr, u64 size);
383
384 void BindHostIndexBuffer();
385
386 void BindHostVertexBuffers();
387
388 void BindHostDrawIndirectBuffers();
389
390 void BindHostGraphicsUniformBuffers(size_t stage);
391
392 void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind);
393
394 void BindHostGraphicsStorageBuffers(size_t stage);
395
396 void BindHostGraphicsTextureBuffers(size_t stage);
397
398 void BindHostTransformFeedbackBuffers();
399
400 void BindHostComputeUniformBuffers();
401
402 void BindHostComputeStorageBuffers();
403
404 void BindHostComputeTextureBuffers();
405
406 void DoUpdateGraphicsBuffers(bool is_indexed);
407
408 void DoUpdateComputeBuffers();
409
410 void UpdateIndexBuffer();
411
412 void UpdateVertexBuffers();
413
414 void UpdateVertexBuffer(u32 index);
415
416 void UpdateDrawIndirect();
417
418 void UpdateUniformBuffers(size_t stage);
419
420 void UpdateStorageBuffers(size_t stage);
421
422 void UpdateTextureBuffers(size_t stage);
423
424 void UpdateTransformFeedbackBuffers();
425
426 void UpdateTransformFeedbackBuffer(u32 index);
427
428 void UpdateComputeUniformBuffers();
429
430 void UpdateComputeStorageBuffers();
431
432 void UpdateComputeTextureBuffers();
433
434 void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size);
435
436 [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size);
437
438 [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size);
439
440 void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);
441
442 [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size);
443
444 void Register(BufferId buffer_id);
445
446 void Unregister(BufferId buffer_id);
447
448 template <bool insert>
449 void ChangeRegister(BufferId buffer_id);
450
451 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
452
453 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
454
455 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
456
457 bool SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr, u32 size);
458
459 void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
460 std::span<BufferCopy> copies);
461
462 void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy,
463 std::span<const BufferCopy> copies);
464
465 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
466
467 void DownloadBufferMemory(Buffer& buffer_id);
468
469 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
470
471 void DeleteBuffer(BufferId buffer_id, bool do_not_mark = false);
472
473 void NotifyBufferDeletion();
474
475 [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index,
476 bool is_written) const;
477
478 [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size,
479 PixelFormat format);
480
481 [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size);
482
483 [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity);
484
485 [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
486
487 void ClearDownload(IntervalType subtract_interval);
488
489 VideoCore::RasterizerInterface& rasterizer;
490 Core::Memory::Memory& cpu_memory;
491
492 SlotVector<Buffer> slot_buffers;
493 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
494
495 const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{};
496
497 u32 last_index_count = 0;
498
499 Binding index_buffer;
500 std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers;
501 std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers;
502 std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers;
503 std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers;
504 std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers;
505 Binding count_buffer_binding;
506 Binding indirect_buffer_binding;
507
508 std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers;
509 std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers;
510 std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers;
511
512 std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{};
513 u32 enabled_compute_uniform_buffer_mask = 0;
514
515 const UniformBufferSizes* uniform_buffer_sizes{};
516 const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{};
517
518 std::array<u32, NUM_STAGES> enabled_storage_buffers{};
519 std::array<u32, NUM_STAGES> written_storage_buffers{};
520 u32 enabled_compute_storage_buffers = 0;
521 u32 written_compute_storage_buffers = 0;
522
523 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
524 std::array<u32, NUM_STAGES> written_texture_buffers{};
525 std::array<u32, NUM_STAGES> image_texture_buffers{};
526 u32 enabled_compute_texture_buffers = 0;
527 u32 written_compute_texture_buffers = 0;
528 u32 image_compute_texture_buffers = 0;
529
530 std::array<u32, 16> uniform_cache_hits{};
531 std::array<u32, 16> uniform_cache_shots{};
532
533 u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
534
535 bool has_deleted_buffers = false;
536
537 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
538 dirty_uniform_buffers{};
539 std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{};
540 std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS,
541 std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty>
542 uniform_buffer_binding_sizes{};
543
544 std::vector<BufferId> cached_write_buffer_ids;
545
546 MemoryTracker memory_tracker;
547 IntervalSet uncommitted_ranges;
548 IntervalSet common_ranges;
549 IntervalSet cached_ranges;
550 IntervalSet pending_ranges;
551 std::deque<IntervalSet> committed_ranges;
552
553 // Async Buffers
554 OverlapCounter async_downloads;
555 std::deque<std::optional<Async_Buffer>> async_buffers;
556 std::deque<boost::container::small_vector<BufferCopy, 4>> pending_downloads;
557 std::optional<Async_Buffer> current_buffer;
558
559 std::deque<Async_Buffer> async_buffers_death_ring;
560
561 size_t immediate_buffer_capacity = 0;
562 Common::ScratchBuffer<u8> immediate_buffer_alloc;
563
564 struct LRUItemParams {
565 using ObjectType = BufferId;
566 using TickType = u64;
567 };
568 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
569 u64 frame_tick = 0;
570 u64 total_used_memory = 0;
571 u64 minimum_memory = 0;
572 u64 critical_memory = 0;
573 BufferId inline_buffer_id;
574
575 bool active_async_buffers = false;
576
577 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
578};
579
580} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/memory_tracker_base.h b/src/video_core/buffer_cache/memory_tracker_base.h
new file mode 100644
index 000000000..dc4ebfcaa
--- /dev/null
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@@ -0,0 +1,273 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <deque>
9#include <limits>
10#include <type_traits>
11#include <unordered_set>
12#include <utility>
13
14#include "common/alignment.h"
15#include "common/common_types.h"
16#include "video_core/buffer_cache/word_manager.h"
17
18namespace VideoCommon {
19
20template <class RasterizerInterface>
21class MemoryTrackerBase {
22 static constexpr size_t MAX_CPU_PAGE_BITS = 39;
23 static constexpr size_t HIGHER_PAGE_BITS = 22;
24 static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
25 static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
26 static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
27 static constexpr size_t MANAGER_POOL_SIZE = 32;
28 static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
29 using Manager = WordManager<RasterizerInterface, WORDS_STACK_NEEDED>;
30
31public:
32 MemoryTrackerBase(RasterizerInterface& rasterizer_) : rasterizer{&rasterizer_} {}
33 ~MemoryTrackerBase() = default;
34
35 /// Returns the inclusive CPU modified range in a begin end pair
36 [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr,
37 u64 query_size) noexcept {
38 return IteratePairs<true>(
39 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
40 return manager->template ModifiedRegion<Type::CPU>(offset, size);
41 });
42 }
43
44 /// Returns the inclusive GPU modified range in a begin end pair
45 [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr,
46 u64 query_size) noexcept {
47 return IteratePairs<false>(
48 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
49 return manager->template ModifiedRegion<Type::GPU>(offset, size);
50 });
51 }
52
53 /// Returns true if a region has been modified from the CPU
54 [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
55 return IteratePages<true>(
56 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
57 return manager->template IsRegionModified<Type::CPU>(offset, size);
58 });
59 }
60
61 /// Returns true if a region has been modified from the GPU
62 [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
63 return IteratePages<false>(
64 query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
65 return manager->template IsRegionModified<Type::GPU>(offset, size);
66 });
67 }
68
69 /// Mark region as CPU modified, notifying the rasterizer about this change
70 void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
71 IteratePages<true>(dirty_cpu_addr, query_size,
72 [](Manager* manager, u64 offset, size_t size) {
73 manager->template ChangeRegionState<Type::CPU, true>(
74 manager->GetCpuAddr() + offset, size);
75 });
76 }
77
78 /// Unmark region as CPU modified, notifying the rasterizer about this change
79 void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
80 IteratePages<true>(dirty_cpu_addr, query_size,
81 [](Manager* manager, u64 offset, size_t size) {
82 manager->template ChangeRegionState<Type::CPU, false>(
83 manager->GetCpuAddr() + offset, size);
84 });
85 }
86
87 /// Mark region as modified from the host GPU
88 void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
89 IteratePages<true>(dirty_cpu_addr, query_size,
90 [](Manager* manager, u64 offset, size_t size) {
91 manager->template ChangeRegionState<Type::GPU, true>(
92 manager->GetCpuAddr() + offset, size);
93 });
94 }
95
96 /// Unmark region as modified from the host GPU
97 void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
98 IteratePages<true>(dirty_cpu_addr, query_size,
99 [](Manager* manager, u64 offset, size_t size) {
100 manager->template ChangeRegionState<Type::GPU, false>(
101 manager->GetCpuAddr() + offset, size);
102 });
103 }
104
105 /// Mark region as modified from the CPU
106 /// but don't mark it as modified until FlusHCachedWrites is called.
107 void CachedCpuWrite(VAddr dirty_cpu_addr, u64 query_size) {
108 IteratePages<true>(
109 dirty_cpu_addr, query_size, [this](Manager* manager, u64 offset, size_t size) {
110 const VAddr cpu_address = manager->GetCpuAddr() + offset;
111 manager->template ChangeRegionState<Type::CachedCPU, true>(cpu_address, size);
112 cached_pages.insert(static_cast<u32>(cpu_address >> HIGHER_PAGE_BITS));
113 });
114 }
115
116 /// Flushes cached CPU writes, and notify the rasterizer about the deltas
117 void FlushCachedWrites(VAddr query_cpu_addr, u64 query_size) noexcept {
118 IteratePages<false>(query_cpu_addr, query_size,
119 [](Manager* manager, [[maybe_unused]] u64 offset,
120 [[maybe_unused]] size_t size) { manager->FlushCachedWrites(); });
121 }
122
123 void FlushCachedWrites() noexcept {
124 for (auto id : cached_pages) {
125 top_tier[id]->FlushCachedWrites();
126 }
127 cached_pages.clear();
128 }
129
130 /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
131 template <typename Func>
132 void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
133 IteratePages<true>(query_cpu_range, query_size,
134 [&func](Manager* manager, u64 offset, size_t size) {
135 manager->template ForEachModifiedRange<Type::CPU, true>(
136 manager->GetCpuAddr() + offset, size, func);
137 });
138 }
139
140 /// Call 'func' for each GPU modified range and unmark those pages as GPU modified
141 template <typename Func>
142 void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, bool clear, Func&& func) {
143 IteratePages<false>(query_cpu_range, query_size,
144 [&func, clear](Manager* manager, u64 offset, size_t size) {
145 if (clear) {
146 manager->template ForEachModifiedRange<Type::GPU, true>(
147 manager->GetCpuAddr() + offset, size, func);
148 } else {
149 manager->template ForEachModifiedRange<Type::GPU, false>(
150 manager->GetCpuAddr() + offset, size, func);
151 }
152 });
153 }
154
155 template <typename Func>
156 void ForEachDownloadRangeAndClear(VAddr query_cpu_range, u64 query_size, Func&& func) {
157 IteratePages<false>(query_cpu_range, query_size,
158 [&func](Manager* manager, u64 offset, size_t size) {
159 manager->template ForEachModifiedRange<Type::GPU, true>(
160 manager->GetCpuAddr() + offset, size, func);
161 });
162 }
163
164private:
165 template <bool create_region_on_fail, typename Func>
166 bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
167 using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
168 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
169 std::size_t remaining_size{size};
170 std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
171 u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
172 while (remaining_size > 0) {
173 const std::size_t copy_amount{
174 std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
175 auto* manager{top_tier[page_index]};
176 if (manager) {
177 if constexpr (BOOL_BREAK) {
178 if (func(manager, page_offset, copy_amount)) {
179 return true;
180 }
181 } else {
182 func(manager, page_offset, copy_amount);
183 }
184 } else if constexpr (create_region_on_fail) {
185 CreateRegion(page_index);
186 manager = top_tier[page_index];
187 if constexpr (BOOL_BREAK) {
188 if (func(manager, page_offset, copy_amount)) {
189 return true;
190 }
191 } else {
192 func(manager, page_offset, copy_amount);
193 }
194 }
195 page_index++;
196 page_offset = 0;
197 remaining_size -= copy_amount;
198 }
199 return false;
200 }
201
202 template <bool create_region_on_fail, typename Func>
203 std::pair<u64, u64> IteratePairs(VAddr cpu_address, size_t size, Func&& func) {
204 std::size_t remaining_size{size};
205 std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
206 u64 page_offset{cpu_address & HIGHER_PAGE_MASK};
207 u64 begin = std::numeric_limits<u64>::max();
208 u64 end = 0;
209 while (remaining_size > 0) {
210 const std::size_t copy_amount{
211 std::min<std::size_t>(HIGHER_PAGE_SIZE - page_offset, remaining_size)};
212 auto* manager{top_tier[page_index]};
213 const auto execute = [&] {
214 auto [new_begin, new_end] = func(manager, page_offset, copy_amount);
215 if (new_begin != 0 || new_end != 0) {
216 const u64 base_address = page_index << HIGHER_PAGE_BITS;
217 begin = std::min(new_begin + base_address, begin);
218 end = std::max(new_end + base_address, end);
219 }
220 };
221 if (manager) {
222 execute();
223 } else if constexpr (create_region_on_fail) {
224 CreateRegion(page_index);
225 manager = top_tier[page_index];
226 execute();
227 }
228 page_index++;
229 page_offset = 0;
230 remaining_size -= copy_amount;
231 }
232 if (begin < end) {
233 return std::make_pair(begin, end);
234 } else {
235 return std::make_pair(0ULL, 0ULL);
236 }
237 }
238
239 void CreateRegion(std::size_t page_index) {
240 const VAddr base_cpu_addr = page_index << HIGHER_PAGE_BITS;
241 top_tier[page_index] = GetNewManager(base_cpu_addr);
242 }
243
244 Manager* GetNewManager(VAddr base_cpu_addess) {
245 const auto on_return = [&] {
246 auto* new_manager = free_managers.front();
247 new_manager->SetCpuAddress(base_cpu_addess);
248 free_managers.pop_front();
249 return new_manager;
250 };
251 if (!free_managers.empty()) {
252 return on_return();
253 }
254 manager_pool.emplace_back();
255 auto& last_pool = manager_pool.back();
256 for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
257 new (&last_pool[i]) Manager(0, *rasterizer, HIGHER_PAGE_SIZE);
258 free_managers.push_back(&last_pool[i]);
259 }
260 return on_return();
261 }
262
263 std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
264 std::deque<Manager*> free_managers;
265
266 std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
267
268 std::unordered_set<u32> cached_pages;
269
270 RasterizerInterface* rasterizer = nullptr;
271};
272
273} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
new file mode 100644
index 000000000..a42455045
--- /dev/null
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -0,0 +1,462 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#pragma once
5
6#include <algorithm>
7#include <bit>
8#include <limits>
9#include <span>
10#include <utility>
11
12#include "common/alignment.h"
13#include "common/common_funcs.h"
14#include "common/common_types.h"
15#include "common/div_ceil.h"
16#include "core/memory.h"
17
18namespace VideoCommon {
19
20constexpr u64 PAGES_PER_WORD = 64;
21constexpr u64 BYTES_PER_PAGE = Core::Memory::YUZU_PAGESIZE;
22constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;
23
24enum class Type {
25 CPU,
26 GPU,
27 CachedCPU,
28 Untracked,
29};
30
31/// Vector tracking modified pages tightly packed with small vector optimization
32template <size_t stack_words = 1>
33struct WordsArray {
34 /// Returns the pointer to the words state
35 [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
36 return is_short ? stack.data() : heap;
37 }
38
39 /// Returns the pointer to the words state
40 [[nodiscard]] u64* Pointer(bool is_short) noexcept {
41 return is_short ? stack.data() : heap;
42 }
43
44 std::array<u64, stack_words> stack{}; ///< Small buffers storage
45 u64* heap; ///< Not-small buffers pointer to the storage
46};
47
48template <size_t stack_words = 1>
49struct Words {
50 explicit Words() = default;
51 explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
52 num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
53 if (IsShort()) {
54 cpu.stack.fill(~u64{0});
55 gpu.stack.fill(0);
56 cached_cpu.stack.fill(0);
57 untracked.stack.fill(~u64{0});
58 } else {
59 // Share allocation between CPU and GPU pages and set their default values
60 u64* const alloc = new u64[num_words * 4];
61 cpu.heap = alloc;
62 gpu.heap = alloc + num_words;
63 cached_cpu.heap = alloc + num_words * 2;
64 untracked.heap = alloc + num_words * 3;
65 std::fill_n(cpu.heap, num_words, ~u64{0});
66 std::fill_n(gpu.heap, num_words, 0);
67 std::fill_n(cached_cpu.heap, num_words, 0);
68 std::fill_n(untracked.heap, num_words, ~u64{0});
69 }
70 // Clean up tailing bits
71 const u64 last_word_size = size_bytes % BYTES_PER_WORD;
72 const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
73 const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
74 const u64 last_word = (~u64{0} << shift) >> shift;
75 cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
76 untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
77 }
78
79 ~Words() {
80 Release();
81 }
82
83 Words& operator=(Words&& rhs) noexcept {
84 Release();
85 size_bytes = rhs.size_bytes;
86 num_words = rhs.num_words;
87 cpu = rhs.cpu;
88 gpu = rhs.gpu;
89 cached_cpu = rhs.cached_cpu;
90 untracked = rhs.untracked;
91 rhs.cpu.heap = nullptr;
92 return *this;
93 }
94
95 Words(Words&& rhs) noexcept
96 : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
97 cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} {
98 rhs.cpu.heap = nullptr;
99 }
100
101 Words& operator=(const Words&) = delete;
102 Words(const Words&) = delete;
103
104 /// Returns true when the buffer fits in the small vector optimization
105 [[nodiscard]] bool IsShort() const noexcept {
106 return num_words <= stack_words;
107 }
108
109 /// Returns the number of words of the buffer
110 [[nodiscard]] size_t NumWords() const noexcept {
111 return num_words;
112 }
113
114 /// Release buffer resources
115 void Release() {
116 if (!IsShort()) {
117 // CPU written words is the base for the heap allocation
118 delete[] cpu.heap;
119 }
120 }
121
122 template <Type type>
123 std::span<u64> Span() noexcept {
124 if constexpr (type == Type::CPU) {
125 return std::span<u64>(cpu.Pointer(IsShort()), num_words);
126 } else if constexpr (type == Type::GPU) {
127 return std::span<u64>(gpu.Pointer(IsShort()), num_words);
128 } else if constexpr (type == Type::CachedCPU) {
129 return std::span<u64>(cached_cpu.Pointer(IsShort()), num_words);
130 } else if constexpr (type == Type::Untracked) {
131 return std::span<u64>(untracked.Pointer(IsShort()), num_words);
132 }
133 }
134
135 template <Type type>
136 std::span<const u64> Span() const noexcept {
137 if constexpr (type == Type::CPU) {
138 return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
139 } else if constexpr (type == Type::GPU) {
140 return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
141 } else if constexpr (type == Type::CachedCPU) {
142 return std::span<const u64>(cached_cpu.Pointer(IsShort()), num_words);
143 } else if constexpr (type == Type::Untracked) {
144 return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
145 }
146 }
147
148 u64 size_bytes = 0;
149 size_t num_words = 0;
150 WordsArray<stack_words> cpu;
151 WordsArray<stack_words> gpu;
152 WordsArray<stack_words> cached_cpu;
153 WordsArray<stack_words> untracked;
154};
155
156template <class RasterizerInterface, size_t stack_words = 1>
157class WordManager {
158public:
159 explicit WordManager(VAddr cpu_addr_, RasterizerInterface& rasterizer_, u64 size_bytes)
160 : cpu_addr{cpu_addr_}, rasterizer{&rasterizer_}, words{size_bytes} {}
161
162 explicit WordManager() = default;
163
164 void SetCpuAddress(VAddr new_cpu_addr) {
165 cpu_addr = new_cpu_addr;
166 }
167
168 VAddr GetCpuAddr() const {
169 return cpu_addr;
170 }
171
172 static u64 ExtractBits(u64 word, size_t page_start, size_t page_end) {
173 constexpr size_t number_bits = sizeof(u64) * 8;
174 const size_t limit_page_end = number_bits - std::min(page_end, number_bits);
175 u64 bits = (word >> page_start) << page_start;
176 bits = (bits << limit_page_end) >> limit_page_end;
177 return bits;
178 }
179
180 static std::pair<size_t, size_t> GetWordPage(VAddr address) {
181 const size_t converted_address = static_cast<size_t>(address);
182 const size_t word_number = converted_address / BYTES_PER_WORD;
183 const size_t amount_pages = converted_address % BYTES_PER_WORD;
184 return std::make_pair(word_number, amount_pages / BYTES_PER_PAGE);
185 }
186
187 template <typename Func>
188 void IterateWords(size_t offset, size_t size, Func&& func) const {
189 using FuncReturn = std::invoke_result_t<Func, std::size_t, u64>;
190 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
191 const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
192 const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
193 if (start >= SizeBytes() || end <= start) {
194 return;
195 }
196 auto [start_word, start_page] = GetWordPage(start);
197 auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
198 const size_t num_words = NumWords();
199 start_word = std::min(start_word, num_words);
200 end_word = std::min(end_word, num_words);
201 const size_t diff = end_word - start_word;
202 end_word += (end_page + PAGES_PER_WORD - 1ULL) / PAGES_PER_WORD;
203 end_word = std::min(end_word, num_words);
204 end_page += diff * PAGES_PER_WORD;
205 constexpr u64 base_mask{~0ULL};
206 for (size_t word_index = start_word; word_index < end_word; word_index++) {
207 const u64 mask = ExtractBits(base_mask, start_page, end_page);
208 start_page = 0;
209 end_page -= PAGES_PER_WORD;
210 if constexpr (BOOL_BREAK) {
211 if (func(word_index, mask)) {
212 return;
213 }
214 } else {
215 func(word_index, mask);
216 }
217 }
218 }
219
220 template <typename Func>
221 void IteratePages(u64 mask, Func&& func) const {
222 size_t offset = 0;
223 while (mask != 0) {
224 const size_t empty_bits = std::countr_zero(mask);
225 offset += empty_bits;
226 mask = mask >> empty_bits;
227
228 const size_t continuous_bits = std::countr_one(mask);
229 func(offset, continuous_bits);
230 mask = continuous_bits < PAGES_PER_WORD ? (mask >> continuous_bits) : 0;
231 offset += continuous_bits;
232 }
233 }
234
235 /**
236 * Change the state of a range of pages
237 *
238 * @param dirty_addr Base address to mark or unmark as modified
239 * @param size Size in bytes to mark or unmark as modified
240 */
241 template <Type type, bool enable>
242 void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
243 std::span<u64> state_words = words.template Span<type>();
244 [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
245 [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
246 IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
247 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
248 NotifyRasterizer<!enable>(index, untracked_words[index], mask);
249 }
250 if constexpr (enable) {
251 state_words[index] |= mask;
252 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
253 untracked_words[index] |= mask;
254 }
255 if constexpr (type == Type::CPU) {
256 cached_words[index] &= ~mask;
257 }
258 } else {
259 if constexpr (type == Type::CPU) {
260 const u64 word = state_words[index] & mask;
261 cached_words[index] &= ~word;
262 }
263 state_words[index] &= ~mask;
264 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
265 untracked_words[index] &= ~mask;
266 }
267 }
268 });
269 }
270
271 /**
272 * Loop over each page in the given range, turn off those bits and notify the rasterizer if
273 * needed. Call the given function on each turned off range.
274 *
275 * @param query_cpu_range Base CPU address to loop over
276 * @param size Size in bytes of the CPU range to loop over
277 * @param func Function to call for each turned off region
278 */
279 template <Type type, bool clear, typename Func>
280 void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
281 static_assert(type != Type::Untracked);
282
283 std::span<u64> state_words = words.template Span<type>();
284 [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
285 [[maybe_unused]] std::span<u64> cached_words = words.template Span<Type::CachedCPU>();
286 const size_t offset = query_cpu_range - cpu_addr;
287 bool pending = false;
288 size_t pending_offset{};
289 size_t pending_pointer{};
290 const auto release = [&]() {
291 func(cpu_addr + pending_offset * BYTES_PER_PAGE,
292 (pending_pointer - pending_offset) * BYTES_PER_PAGE);
293 };
294 IterateWords(offset, size, [&](size_t index, u64 mask) {
295 const u64 word = state_words[index] & mask;
296 if constexpr (clear) {
297 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
298 NotifyRasterizer<true>(index, untracked_words[index], mask);
299 }
300 state_words[index] &= ~mask;
301 if constexpr (type == Type::CPU || type == Type::CachedCPU) {
302 untracked_words[index] &= ~mask;
303 }
304 if constexpr (type == Type::CPU) {
305 cached_words[index] &= ~word;
306 }
307 }
308 const size_t base_offset = index * PAGES_PER_WORD;
309 IteratePages(word, [&](size_t pages_offset, size_t pages_size) {
310 const auto reset = [&]() {
311 pending_offset = base_offset + pages_offset;
312 pending_pointer = base_offset + pages_offset + pages_size;
313 };
314 if (!pending) {
315 reset();
316 pending = true;
317 return;
318 }
319 if (pending_pointer == base_offset + pages_offset) {
320 pending_pointer += pages_size;
321 return;
322 }
323 release();
324 reset();
325 });
326 });
327 if (pending) {
328 release();
329 }
330 }
331
332 /**
333 * Returns true when a region has been modified
334 *
335 * @param offset Offset in bytes from the start of the buffer
336 * @param size Size in bytes of the region to query for modifications
337 */
338 template <Type type>
339 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
340 static_assert(type != Type::Untracked);
341
342 const std::span<const u64> state_words = words.template Span<type>();
343 bool result = false;
344 IterateWords(offset, size, [&](size_t index, u64 mask) {
345 const u64 word = state_words[index] & mask;
346 if (word != 0) {
347 result = true;
348 return true;
349 }
350 return false;
351 });
352 return result;
353 }
354
355 /**
356 * Returns a begin end pair with the inclusive modified region
357 *
358 * @param offset Offset in bytes from the start of the buffer
359 * @param size Size in bytes of the region to query for modifications
360 */
361 template <Type type>
362 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
363 static_assert(type != Type::Untracked);
364 const std::span<const u64> state_words = words.template Span<type>();
365 u64 begin = std::numeric_limits<u64>::max();
366 u64 end = 0;
367 IterateWords(offset, size, [&](size_t index, u64 mask) {
368 const u64 word = state_words[index] & mask;
369 if (word == 0) {
370 return;
371 }
372 const u64 local_page_begin = std::countr_zero(word);
373 const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word);
374 const u64 page_index = index * PAGES_PER_WORD;
375 begin = std::min(begin, page_index + local_page_begin);
376 end = page_index + local_page_end;
377 });
378 static constexpr std::pair<u64, u64> EMPTY{0, 0};
379 return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY;
380 }
381
382 /// Returns the number of words of the manager
383 [[nodiscard]] size_t NumWords() const noexcept {
384 return words.NumWords();
385 }
386
387 /// Returns the size in bytes of the manager
388 [[nodiscard]] u64 SizeBytes() const noexcept {
389 return words.size_bytes;
390 }
391
392 /// Returns true when the buffer fits in the small vector optimization
393 [[nodiscard]] bool IsShort() const noexcept {
394 return words.IsShort();
395 }
396
397 void FlushCachedWrites() noexcept {
398 const u64 num_words = NumWords();
399 u64* const cached_words = Array<Type::CachedCPU>();
400 u64* const untracked_words = Array<Type::Untracked>();
401 u64* const cpu_words = Array<Type::CPU>();
402 for (u64 word_index = 0; word_index < num_words; ++word_index) {
403 const u64 cached_bits = cached_words[word_index];
404 NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits);
405 untracked_words[word_index] |= cached_bits;
406 cpu_words[word_index] |= cached_bits;
407 cached_words[word_index] = 0;
408 }
409 }
410
411private:
412 template <Type type>
413 u64* Array() noexcept {
414 if constexpr (type == Type::CPU) {
415 return words.cpu.Pointer(IsShort());
416 } else if constexpr (type == Type::GPU) {
417 return words.gpu.Pointer(IsShort());
418 } else if constexpr (type == Type::CachedCPU) {
419 return words.cached_cpu.Pointer(IsShort());
420 } else if constexpr (type == Type::Untracked) {
421 return words.untracked.Pointer(IsShort());
422 }
423 }
424
425 template <Type type>
426 const u64* Array() const noexcept {
427 if constexpr (type == Type::CPU) {
428 return words.cpu.Pointer(IsShort());
429 } else if constexpr (type == Type::GPU) {
430 return words.gpu.Pointer(IsShort());
431 } else if constexpr (type == Type::CachedCPU) {
432 return words.cached_cpu.Pointer(IsShort());
433 } else if constexpr (type == Type::Untracked) {
434 return words.untracked.Pointer(IsShort());
435 }
436 }
437
438 /**
439 * Notify rasterizer about changes in the CPU tracking state of a word in the buffer
440 *
441 * @param word_index Index to the word to notify to the rasterizer
442 * @param current_bits Current state of the word
443 * @param new_bits New state of the word
444 *
445 * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages
446 */
447 template <bool add_to_rasterizer>
448 void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const {
449 u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits;
450 VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
451 IteratePages(changed_bits, [&](size_t offset, size_t size) {
452 rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE,
453 size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1);
454 });
455 }
456
457 VAddr cpu_addr = 0;
458 RasterizerInterface* rasterizer = nullptr;
459 Words<stack_words> words;
460};
461
462} // namespace VideoCommon
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a8c3f8b67..18d3c3ac0 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -8,6 +8,7 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/buffer_cache/memory_tracker_base.h"
11#include "video_core/rasterizer_interface.h" 12#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_opengl/gl_device.h" 13#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -200,6 +201,8 @@ private:
200struct BufferCacheParams { 201struct BufferCacheParams {
201 using Runtime = OpenGL::BufferCacheRuntime; 202 using Runtime = OpenGL::BufferCacheRuntime;
202 using Buffer = OpenGL::Buffer; 203 using Buffer = OpenGL::Buffer;
204 using Async_Buffer = u32;
205 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
203 206
204 static constexpr bool IS_OPENGL = true; 207 static constexpr bool IS_OPENGL = true;
205 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; 208 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true;
@@ -208,6 +211,7 @@ struct BufferCacheParams {
208 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 211 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
209 static constexpr bool USE_MEMORY_MAPS = false; 212 static constexpr bool USE_MEMORY_MAPS = false;
210 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; 213 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
214 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
211}; 215};
212 216
213using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 217using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
new file mode 100644
index 000000000..f15ae8e25
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include "video_core/buffer_cache/buffer_cache.h"
5#include "video_core/renderer_opengl/gl_buffer_cache.h"
6
7namespace VideoCommon {
8template class VideoCommon::BufferCache<OpenGL::BufferCacheParams>;
9}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 2a8d9e377..908625c66 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -93,8 +93,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
93 state_tracker(), scheduler(device, state_tracker), 93 state_tracker(), scheduler(device, state_tracker),
94 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, 94 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
95 render_window.GetFramebufferLayout().height, false), 95 render_window.GetFramebufferLayout().height, false),
96 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, 96 present_manager(render_window, device, memory_allocator, scheduler, swapchain),
97 screen_info), 97 blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, present_manager,
98 scheduler, screen_info),
98 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 99 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
99 state_tracker, scheduler) { 100 state_tracker, scheduler) {
100 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { 101 if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
@@ -121,46 +122,19 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
121 return; 122 return;
122 } 123 }
123 // Update screen info if the framebuffer size has changed. 124 // Update screen info if the framebuffer size has changed.
124 if (screen_info.width != framebuffer->width || screen_info.height != framebuffer->height) { 125 screen_info.width = framebuffer->width;
125 screen_info.width = framebuffer->width; 126 screen_info.height = framebuffer->height;
126 screen_info.height = framebuffer->height; 127
127 }
128 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; 128 const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset;
129 const bool use_accelerated = 129 const bool use_accelerated =
130 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); 130 rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
131 const bool is_srgb = use_accelerated && screen_info.is_srgb; 131 const bool is_srgb = use_accelerated && screen_info.is_srgb;
132 RenderScreenshot(*framebuffer, use_accelerated); 132 RenderScreenshot(*framebuffer, use_accelerated);
133 133
134 bool has_been_recreated = false; 134 Frame* frame = present_manager.GetRenderFrame();
135 const auto recreate_swapchain = [&](u32 width, u32 height) { 135 blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb);
136 if (!has_been_recreated) { 136 scheduler.Flush(*frame->render_ready);
137 has_been_recreated = true; 137 present_manager.Present(frame);
138 scheduler.Finish();
139 }
140 swapchain.Create(width, height, is_srgb);
141 };
142
143 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
144 if (swapchain.NeedsRecreation(is_srgb) || swapchain.GetWidth() != layout.width ||
145 swapchain.GetHeight() != layout.height) {
146 recreate_swapchain(layout.width, layout.height);
147 }
148 bool is_outdated;
149 do {
150 swapchain.AcquireNextImage();
151 is_outdated = swapchain.IsOutDated();
152 if (is_outdated) {
153 recreate_swapchain(layout.width, layout.height);
154 }
155 } while (is_outdated);
156 if (has_been_recreated) {
157 blit_screen.Recreate();
158 }
159 const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
160 const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
161 scheduler.Flush(render_semaphore, present_semaphore);
162 scheduler.WaitWorker();
163 swapchain.Present(render_semaphore);
164 138
165 gpu.RendererFrameEndNotify(); 139 gpu.RendererFrameEndNotify();
166 rasterizer.TickFrame(); 140 rasterizer.TickFrame();
@@ -246,8 +220,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
246 }); 220 });
247 const VkExtent2D render_area{.width = layout.width, .height = layout.height}; 221 const VkExtent2D render_area{.width = layout.width, .height = layout.height};
248 const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); 222 const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
249 // Since we're not rendering to the screen, ignore the render semaphore. 223 blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated);
250 void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
251 224
252 const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); 225 const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
253 const VkBufferCreateInfo dst_buffer_info{ 226 const VkBufferCreateInfo dst_buffer_info{
@@ -270,7 +243,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
270 .pNext = nullptr, 243 .pNext = nullptr,
271 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, 244 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
272 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, 245 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
273 .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 246 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
274 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 247 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
275 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 248 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
276 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 249 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 009e75e0d..f44367cb2 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -9,6 +9,7 @@
9#include "common/dynamic_library.h" 9#include "common/dynamic_library.h"
10#include "video_core/renderer_base.h" 10#include "video_core/renderer_base.h"
11#include "video_core/renderer_vulkan/vk_blit_screen.h" 11#include "video_core/renderer_vulkan/vk_blit_screen.h"
12#include "video_core/renderer_vulkan/vk_present_manager.h"
12#include "video_core/renderer_vulkan/vk_rasterizer.h" 13#include "video_core/renderer_vulkan/vk_rasterizer.h"
13#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h" 15#include "video_core/renderer_vulkan/vk_state_tracker.h"
@@ -76,6 +77,7 @@ private:
76 StateTracker state_tracker; 77 StateTracker state_tracker;
77 Scheduler scheduler; 78 Scheduler scheduler;
78 Swapchain swapchain; 79 Swapchain swapchain;
80 PresentManager present_manager;
79 BlitScreen blit_screen; 81 BlitScreen blit_screen;
80 RasterizerVulkan rasterizer; 82 RasterizerVulkan rasterizer;
81 std::optional<TurboMode> turbo_mode; 83 std::optional<TurboMode> turbo_mode;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 2f0cc27e8..1e0fdd3d9 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -122,10 +122,12 @@ struct BlitScreen::BufferData {
122 122
123BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_, 123BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWindow& render_window_,
124 const Device& device_, MemoryAllocator& memory_allocator_, 124 const Device& device_, MemoryAllocator& memory_allocator_,
125 Swapchain& swapchain_, Scheduler& scheduler_, const ScreenInfo& screen_info_) 125 Swapchain& swapchain_, PresentManager& present_manager_,
126 Scheduler& scheduler_, const ScreenInfo& screen_info_)
126 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, 127 : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_},
127 memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, 128 memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_},
128 image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { 129 scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_},
130 current_srgb{swapchain.IsSrgb()}, image_view_format{swapchain.GetImageViewFormat()} {
129 resource_ticks.resize(image_count); 131 resource_ticks.resize(image_count);
130 132
131 CreateStaticResources(); 133 CreateStaticResources();
@@ -135,25 +137,20 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin
135BlitScreen::~BlitScreen() = default; 137BlitScreen::~BlitScreen() = default;
136 138
137void BlitScreen::Recreate() { 139void BlitScreen::Recreate() {
140 present_manager.WaitPresent();
141 scheduler.Finish();
142 device.GetLogical().WaitIdle();
138 CreateDynamicResources(); 143 CreateDynamicResources();
139} 144}
140 145
141VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, 146void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
142 const VkFramebuffer& host_framebuffer, 147 const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout,
143 const Layout::FramebufferLayout layout, VkExtent2D render_area, 148 VkExtent2D render_area, bool use_accelerated) {
144 bool use_accelerated) {
145 RefreshResources(framebuffer); 149 RefreshResources(framebuffer);
146 150
147 // Finish any pending renderpass 151 // Finish any pending renderpass
148 scheduler.RequestOutsideRenderPassOperationContext(); 152 scheduler.RequestOutsideRenderPassOperationContext();
149 153
150 if (const auto swapchain_images = swapchain.GetImageCount(); swapchain_images != image_count) {
151 image_count = swapchain_images;
152 Recreate();
153 }
154
155 const std::size_t image_index = swapchain.GetImageIndex();
156
157 scheduler.Wait(resource_ticks[image_index]); 154 scheduler.Wait(resource_ticks[image_index]);
158 resource_ticks[image_index] = scheduler.CurrentTick(); 155 resource_ticks[image_index] = scheduler.CurrentTick();
159 156
@@ -169,7 +166,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
169 std::memcpy(mapped_span.data(), &data, sizeof(data)); 166 std::memcpy(mapped_span.data(), &data, sizeof(data));
170 167
171 if (!use_accelerated) { 168 if (!use_accelerated) {
172 const u64 image_offset = GetRawImageOffset(framebuffer, image_index); 169 const u64 image_offset = GetRawImageOffset(framebuffer);
173 170
174 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 171 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
175 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 172 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
@@ -204,8 +201,8 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
204 .depth = 1, 201 .depth = 1,
205 }, 202 },
206 }; 203 };
207 scheduler.Record([this, copy, image_index](vk::CommandBuffer cmdbuf) { 204 scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) {
208 const VkImage image = *raw_images[image_index]; 205 const VkImage image = *raw_images[index];
209 const VkImageMemoryBarrier base_barrier{ 206 const VkImageMemoryBarrier base_barrier{
210 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 207 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
211 .pNext = nullptr, 208 .pNext = nullptr,
@@ -245,14 +242,15 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
245 242
246 const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); 243 const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue();
247 if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { 244 if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) {
248 UpdateAADescriptorSet(image_index, source_image_view, false); 245 UpdateAADescriptorSet(source_image_view, false);
249 const u32 up_scale = Settings::values.resolution_info.up_scale; 246 const u32 up_scale = Settings::values.resolution_info.up_scale;
250 const u32 down_shift = Settings::values.resolution_info.down_shift; 247 const u32 down_shift = Settings::values.resolution_info.down_shift;
251 VkExtent2D size{ 248 VkExtent2D size{
252 .width = (up_scale * framebuffer.width) >> down_shift, 249 .width = (up_scale * framebuffer.width) >> down_shift,
253 .height = (up_scale * framebuffer.height) >> down_shift, 250 .height = (up_scale * framebuffer.height) >> down_shift,
254 }; 251 };
255 scheduler.Record([this, image_index, size, anti_alias_pass](vk::CommandBuffer cmdbuf) { 252 scheduler.Record([this, index = image_index, size,
253 anti_alias_pass](vk::CommandBuffer cmdbuf) {
256 const VkImageMemoryBarrier base_barrier{ 254 const VkImageMemoryBarrier base_barrier{
257 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, 255 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
258 .pNext = nullptr, 256 .pNext = nullptr,
@@ -326,7 +324,7 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
326 324
327 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); 325 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
328 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, 326 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0,
329 aa_descriptor_sets[image_index], {}); 327 aa_descriptor_sets[index], {});
330 cmdbuf.Draw(4, 1, 0, 0); 328 cmdbuf.Draw(4, 1, 0, 0);
331 cmdbuf.EndRenderPass(); 329 cmdbuf.EndRenderPass();
332 330
@@ -369,81 +367,99 @@ VkSemaphore BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
369 }; 367 };
370 VkImageView fsr_image_view = 368 VkImageView fsr_image_view =
371 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); 369 fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect);
372 UpdateDescriptorSet(image_index, fsr_image_view, true); 370 UpdateDescriptorSet(fsr_image_view, true);
373 } else { 371 } else {
374 const bool is_nn = 372 const bool is_nn =
375 Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; 373 Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor;
376 UpdateDescriptorSet(image_index, source_image_view, is_nn); 374 UpdateDescriptorSet(source_image_view, is_nn);
377 } 375 }
378 376
379 scheduler.Record( 377 scheduler.Record([this, host_framebuffer, index = image_index,
380 [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { 378 size = render_area](vk::CommandBuffer cmdbuf) {
381 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; 379 const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
382 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; 380 const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
383 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; 381 const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
384 const VkClearValue clear_color{ 382 const VkClearValue clear_color{
385 .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, 383 .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
386 }; 384 };
387 const VkRenderPassBeginInfo renderpass_bi{ 385 const VkRenderPassBeginInfo renderpass_bi{
388 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 386 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
389 .pNext = nullptr, 387 .pNext = nullptr,
390 .renderPass = *renderpass, 388 .renderPass = *renderpass,
391 .framebuffer = host_framebuffer, 389 .framebuffer = host_framebuffer,
392 .renderArea = 390 .renderArea =
393 { 391 {
394 .offset = {0, 0}, 392 .offset = {0, 0},
395 .extent = size, 393 .extent = size,
396 }, 394 },
397 .clearValueCount = 1, 395 .clearValueCount = 1,
398 .pClearValues = &clear_color, 396 .pClearValues = &clear_color,
399 }; 397 };
400 const VkViewport viewport{ 398 const VkViewport viewport{
401 .x = 0.0f, 399 .x = 0.0f,
402 .y = 0.0f, 400 .y = 0.0f,
403 .width = static_cast<float>(size.width), 401 .width = static_cast<float>(size.width),
404 .height = static_cast<float>(size.height), 402 .height = static_cast<float>(size.height),
405 .minDepth = 0.0f, 403 .minDepth = 0.0f,
406 .maxDepth = 1.0f, 404 .maxDepth = 1.0f,
407 }; 405 };
408 const VkRect2D scissor{ 406 const VkRect2D scissor{
409 .offset = {0, 0}, 407 .offset = {0, 0},
410 .extent = size, 408 .extent = size,
411 }; 409 };
412 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); 410 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
413 auto graphics_pipeline = [this]() { 411 auto graphics_pipeline = [this]() {
414 switch (Settings::values.scaling_filter.GetValue()) { 412 switch (Settings::values.scaling_filter.GetValue()) {
415 case Settings::ScalingFilter::NearestNeighbor: 413 case Settings::ScalingFilter::NearestNeighbor:
416 case Settings::ScalingFilter::Bilinear: 414 case Settings::ScalingFilter::Bilinear:
417 return *bilinear_pipeline; 415 return *bilinear_pipeline;
418 case Settings::ScalingFilter::Bicubic: 416 case Settings::ScalingFilter::Bicubic:
419 return *bicubic_pipeline; 417 return *bicubic_pipeline;
420 case Settings::ScalingFilter::Gaussian: 418 case Settings::ScalingFilter::Gaussian:
421 return *gaussian_pipeline; 419 return *gaussian_pipeline;
422 case Settings::ScalingFilter::ScaleForce: 420 case Settings::ScalingFilter::ScaleForce:
423 return *scaleforce_pipeline; 421 return *scaleforce_pipeline;
424 default: 422 default:
425 return *bilinear_pipeline; 423 return *bilinear_pipeline;
426 } 424 }
427 }(); 425 }();
428 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); 426 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline);
429 cmdbuf.SetViewport(0, viewport); 427 cmdbuf.SetViewport(0, viewport);
430 cmdbuf.SetScissor(0, scissor); 428 cmdbuf.SetScissor(0, scissor);
431 429
432 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); 430 cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
433 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, 431 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
434 descriptor_sets[image_index], {}); 432 descriptor_sets[index], {});
435 cmdbuf.Draw(4, 1, 0, 0); 433 cmdbuf.Draw(4, 1, 0, 0);
436 cmdbuf.EndRenderPass(); 434 cmdbuf.EndRenderPass();
437 }); 435 });
438 return *semaphores[image_index];
439} 436}
440 437
441VkSemaphore BlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, 438void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
442 bool use_accelerated) { 439 bool use_accelerated, bool is_srgb) {
443 const std::size_t image_index = swapchain.GetImageIndex(); 440 // Recreate dynamic resources if the the image count or colorspace changed
444 const VkExtent2D render_area = swapchain.GetSize(); 441 if (const std::size_t swapchain_images = swapchain.GetImageCount();
442 swapchain_images != image_count || current_srgb != is_srgb) {
443 current_srgb = is_srgb;
444 image_view_format = current_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
445 image_count = swapchain_images;
446 Recreate();
447 }
448
449 // Recreate the presentation frame if the dimensions of the window changed
445 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); 450 const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
446 return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated); 451 if (layout.width != frame->width || layout.height != frame->height ||
452 is_srgb != frame->is_srgb) {
453 Recreate();
454 present_manager.RecreateFrame(frame, layout.width, layout.height, is_srgb,
455 image_view_format, *renderpass);
456 }
457
458 const VkExtent2D render_area{frame->width, frame->height};
459 Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated);
460 if (++image_index >= image_count) {
461 image_index = 0;
462 }
447} 463}
448 464
449vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { 465vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
@@ -471,13 +487,11 @@ void BlitScreen::CreateStaticResources() {
471} 487}
472 488
473void BlitScreen::CreateDynamicResources() { 489void BlitScreen::CreateDynamicResources() {
474 CreateSemaphores();
475 CreateDescriptorPool(); 490 CreateDescriptorPool();
476 CreateDescriptorSetLayout(); 491 CreateDescriptorSetLayout();
477 CreateDescriptorSets(); 492 CreateDescriptorSets();
478 CreatePipelineLayout(); 493 CreatePipelineLayout();
479 CreateRenderPass(); 494 CreateRenderPass();
480 CreateFramebuffers();
481 CreateGraphicsPipeline(); 495 CreateGraphicsPipeline();
482 fsr.reset(); 496 fsr.reset();
483 smaa.reset(); 497 smaa.reset();
@@ -525,11 +539,6 @@ void BlitScreen::CreateShaders() {
525 } 539 }
526} 540}
527 541
528void BlitScreen::CreateSemaphores() {
529 semaphores.resize(image_count);
530 std::ranges::generate(semaphores, [this] { return device.GetLogical().CreateSemaphore(); });
531}
532
533void BlitScreen::CreateDescriptorPool() { 542void BlitScreen::CreateDescriptorPool() {
534 const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ 543 const std::array<VkDescriptorPoolSize, 2> pool_sizes{{
535 { 544 {
@@ -571,10 +580,10 @@ void BlitScreen::CreateDescriptorPool() {
571} 580}
572 581
573void BlitScreen::CreateRenderPass() { 582void BlitScreen::CreateRenderPass() {
574 renderpass = CreateRenderPassImpl(swapchain.GetImageViewFormat()); 583 renderpass = CreateRenderPassImpl(image_view_format);
575} 584}
576 585
577vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present) { 586vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) {
578 const VkAttachmentDescription color_attachment{ 587 const VkAttachmentDescription color_attachment{
579 .flags = 0, 588 .flags = 0,
580 .format = format, 589 .format = format,
@@ -584,7 +593,7 @@ vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format, bool is_present
584 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, 593 .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
585 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, 594 .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
586 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 595 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
587 .finalLayout = is_present ? VK_IMAGE_LAYOUT_PRESENT_SRC_KHR : VK_IMAGE_LAYOUT_GENERAL, 596 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
588 }; 597 };
589 598
590 const VkAttachmentReference color_attachment_ref{ 599 const VkAttachmentReference color_attachment_ref{
@@ -1052,16 +1061,6 @@ void BlitScreen::CreateSampler() {
1052 nn_sampler = device.GetLogical().CreateSampler(ci_nn); 1061 nn_sampler = device.GetLogical().CreateSampler(ci_nn);
1053} 1062}
1054 1063
1055void BlitScreen::CreateFramebuffers() {
1056 const VkExtent2D size{swapchain.GetSize()};
1057 framebuffers.resize(image_count);
1058
1059 for (std::size_t i = 0; i < image_count; ++i) {
1060 const VkImageView image_view{swapchain.GetImageViewIndex(i)};
1061 framebuffers[i] = CreateFramebuffer(image_view, size, renderpass);
1062 }
1063}
1064
1065void BlitScreen::ReleaseRawImages() { 1064void BlitScreen::ReleaseRawImages() {
1066 for (const u64 tick : resource_ticks) { 1065 for (const u64 tick : resource_ticks) {
1067 scheduler.Wait(tick); 1066 scheduler.Wait(tick);
@@ -1175,7 +1174,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1175 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); 1174 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
1176 return; 1175 return;
1177 } 1176 }
1178 aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer), false); 1177 aa_renderpass = CreateRenderPassImpl(GetFormat(framebuffer));
1179 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); 1178 aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass);
1180 1179
1181 const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ 1180 const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{
@@ -1319,8 +1318,7 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1319 aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); 1318 aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci);
1320} 1319}
1321 1320
1322void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, 1321void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const {
1323 bool nn) const {
1324 const VkDescriptorImageInfo image_info{ 1322 const VkDescriptorImageInfo image_info{
1325 .sampler = nn ? *nn_sampler : *sampler, 1323 .sampler = nn ? *nn_sampler : *sampler,
1326 .imageView = image_view, 1324 .imageView = image_view,
@@ -1356,8 +1354,7 @@ void BlitScreen::UpdateAADescriptorSet(std::size_t image_index, VkImageView imag
1356 device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); 1354 device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {});
1357} 1355}
1358 1356
1359void BlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, 1357void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const {
1360 bool nn) const {
1361 const VkDescriptorBufferInfo buffer_info{ 1358 const VkDescriptorBufferInfo buffer_info{
1362 .buffer = *buffer, 1359 .buffer = *buffer,
1363 .offset = offsetof(BufferData, uniform), 1360 .offset = offsetof(BufferData, uniform),
@@ -1480,8 +1477,7 @@ u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer)
1480 return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; 1477 return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
1481} 1478}
1482 1479
1483u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, 1480u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const {
1484 std::size_t image_index) const {
1485 constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); 1481 constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
1486 return first_image_offset + GetSizeInBytes(framebuffer) * image_index; 1482 return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
1487} 1483}
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index ebe10b08b..68ec20253 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,6 +5,7 @@
5 5
6#include <memory> 6#include <memory>
7 7
8#include "core/frontend/framebuffer_layout.h"
8#include "video_core/vulkan_common/vulkan_memory_allocator.h" 9#include "video_core/vulkan_common/vulkan_memory_allocator.h"
9#include "video_core/vulkan_common/vulkan_wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
10 11
@@ -42,6 +43,9 @@ class RasterizerVulkan;
42class Scheduler; 43class Scheduler;
43class SMAA; 44class SMAA;
44class Swapchain; 45class Swapchain;
46class PresentManager;
47
48struct Frame;
45 49
46struct ScreenInfo { 50struct ScreenInfo {
47 VkImage image{}; 51 VkImage image{};
@@ -55,18 +59,17 @@ class BlitScreen {
55public: 59public:
56 explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window, 60 explicit BlitScreen(Core::Memory::Memory& cpu_memory, Core::Frontend::EmuWindow& render_window,
57 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain, 61 const Device& device, MemoryAllocator& memory_manager, Swapchain& swapchain,
58 Scheduler& scheduler, const ScreenInfo& screen_info); 62 PresentManager& present_manager, Scheduler& scheduler,
63 const ScreenInfo& screen_info);
59 ~BlitScreen(); 64 ~BlitScreen();
60 65
61 void Recreate(); 66 void Recreate();
62 67
63 [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, 68 void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer,
64 const VkFramebuffer& host_framebuffer, 69 const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated);
65 const Layout::FramebufferLayout layout, VkExtent2D render_area,
66 bool use_accelerated);
67 70
68 [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, 71 void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer,
69 bool use_accelerated); 72 bool use_accelerated, bool is_srgb);
70 73
71 [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, 74 [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
72 VkExtent2D extent); 75 VkExtent2D extent);
@@ -79,10 +82,9 @@ private:
79 82
80 void CreateStaticResources(); 83 void CreateStaticResources();
81 void CreateShaders(); 84 void CreateShaders();
82 void CreateSemaphores();
83 void CreateDescriptorPool(); 85 void CreateDescriptorPool();
84 void CreateRenderPass(); 86 void CreateRenderPass();
85 vk::RenderPass CreateRenderPassImpl(VkFormat, bool is_present = true); 87 vk::RenderPass CreateRenderPassImpl(VkFormat format);
86 void CreateDescriptorSetLayout(); 88 void CreateDescriptorSetLayout();
87 void CreateDescriptorSets(); 89 void CreateDescriptorSets();
88 void CreatePipelineLayout(); 90 void CreatePipelineLayout();
@@ -90,15 +92,14 @@ private:
90 void CreateSampler(); 92 void CreateSampler();
91 93
92 void CreateDynamicResources(); 94 void CreateDynamicResources();
93 void CreateFramebuffers();
94 95
95 void RefreshResources(const Tegra::FramebufferConfig& framebuffer); 96 void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
96 void ReleaseRawImages(); 97 void ReleaseRawImages();
97 void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); 98 void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
98 void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); 99 void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
99 100
100 void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; 101 void UpdateDescriptorSet(VkImageView image_view, bool nn) const;
101 void UpdateAADescriptorSet(std::size_t image_index, VkImageView image_view, bool nn) const; 102 void UpdateAADescriptorSet(VkImageView image_view, bool nn) const;
102 void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; 103 void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
103 void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, 104 void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
104 const Layout::FramebufferLayout layout) const; 105 const Layout::FramebufferLayout layout) const;
@@ -107,16 +108,17 @@ private:
107 void CreateFSR(); 108 void CreateFSR();
108 109
109 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; 110 u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
110 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, 111 u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const;
111 std::size_t image_index) const;
112 112
113 Core::Memory::Memory& cpu_memory; 113 Core::Memory::Memory& cpu_memory;
114 Core::Frontend::EmuWindow& render_window; 114 Core::Frontend::EmuWindow& render_window;
115 const Device& device; 115 const Device& device;
116 MemoryAllocator& memory_allocator; 116 MemoryAllocator& memory_allocator;
117 Swapchain& swapchain; 117 Swapchain& swapchain;
118 PresentManager& present_manager;
118 Scheduler& scheduler; 119 Scheduler& scheduler;
119 std::size_t image_count; 120 std::size_t image_count;
121 std::size_t image_index{};
120 const ScreenInfo& screen_info; 122 const ScreenInfo& screen_info;
121 123
122 vk::ShaderModule vertex_shader; 124 vk::ShaderModule vertex_shader;
@@ -135,7 +137,6 @@ private:
135 vk::Pipeline gaussian_pipeline; 137 vk::Pipeline gaussian_pipeline;
136 vk::Pipeline scaleforce_pipeline; 138 vk::Pipeline scaleforce_pipeline;
137 vk::RenderPass renderpass; 139 vk::RenderPass renderpass;
138 std::vector<vk::Framebuffer> framebuffers;
139 vk::DescriptorSets descriptor_sets; 140 vk::DescriptorSets descriptor_sets;
140 vk::Sampler nn_sampler; 141 vk::Sampler nn_sampler;
141 vk::Sampler sampler; 142 vk::Sampler sampler;
@@ -145,7 +146,6 @@ private:
145 146
146 std::vector<u64> resource_ticks; 147 std::vector<u64> resource_ticks;
147 148
148 std::vector<vk::Semaphore> semaphores;
149 std::vector<vk::Image> raw_images; 149 std::vector<vk::Image> raw_images;
150 std::vector<vk::ImageView> raw_image_views; 150 std::vector<vk::ImageView> raw_image_views;
151 std::vector<MemoryCommit> raw_buffer_commits; 151 std::vector<MemoryCommit> raw_buffer_commits;
@@ -164,6 +164,8 @@ private:
164 u32 raw_width = 0; 164 u32 raw_width = 0;
165 u32 raw_height = 0; 165 u32 raw_height = 0;
166 Service::android::PixelFormat pixel_format{}; 166 Service::android::PixelFormat pixel_format{};
167 bool current_srgb;
168 VkFormat image_view_format;
167 169
168 std::unique_ptr<FSR> fsr; 170 std::unique_ptr<FSR> fsr;
169 std::unique_ptr<SMAA> smaa; 171 std::unique_ptr<SMAA> smaa;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 9cbcb3c8f..510602e8e 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -314,8 +314,12 @@ StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) {
314 return staging_pool.Request(size, MemoryUsage::Upload); 314 return staging_pool.Request(size, MemoryUsage::Upload);
315} 315}
316 316
317StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { 317StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) {
318 return staging_pool.Request(size, MemoryUsage::Download); 318 return staging_pool.Request(size, MemoryUsage::Download, deferred);
319}
320
321void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
322 staging_pool.FreeDeferred(ref);
319} 323}
320 324
321u64 BufferCacheRuntime::GetDeviceLocalMemory() const { 325u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 183b33632..879f1ed94 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -3,7 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include "video_core/buffer_cache/buffer_cache.h" 6#include "video_core/buffer_cache/buffer_cache_base.h"
7#include "video_core/buffer_cache/memory_tracker_base.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_vulkan/vk_compute_pass.h" 9#include "video_core/renderer_vulkan/vk_compute_pass.h"
9#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 10#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -75,7 +76,9 @@ public:
75 76
76 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); 77 [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size);
77 78
78 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); 79 [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false);
80
81 void FreeDeferredStagingBuffer(StagingBufferRef& ref);
79 82
80 void PreCopyBarrier(); 83 void PreCopyBarrier();
81 84
@@ -142,6 +145,8 @@ private:
142struct BufferCacheParams { 145struct BufferCacheParams {
143 using Runtime = Vulkan::BufferCacheRuntime; 146 using Runtime = Vulkan::BufferCacheRuntime;
144 using Buffer = Vulkan::Buffer; 147 using Buffer = Vulkan::Buffer;
148 using Async_Buffer = Vulkan::StagingBufferRef;
149 using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
145 150
146 static constexpr bool IS_OPENGL = false; 151 static constexpr bool IS_OPENGL = false;
147 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; 152 static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false;
@@ -150,6 +155,7 @@ struct BufferCacheParams {
150 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 155 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
151 static constexpr bool USE_MEMORY_MAPS = true; 156 static constexpr bool USE_MEMORY_MAPS = true;
152 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; 157 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
158 static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
153}; 159};
154 160
155using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 161using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
new file mode 100644
index 000000000..f9e271507
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache_base.cpp
@@ -0,0 +1,9 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "video_core/buffer_cache/buffer_cache.h"
5#include "video_core/renderer_vulkan/vk_buffer_cache.h"
6
7namespace VideoCommon {
8template class VideoCommon::BufferCache<Vulkan::BufferCacheParams>;
9}
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 985cc3203..a318d643e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -696,6 +696,13 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
696std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( 696std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
697 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, 697 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
698 PipelineStatistics* statistics, bool build_in_parallel) try { 698 PipelineStatistics* statistics, bool build_in_parallel) try {
699 // TODO: Remove this when Intel fixes their shader compiler.
700 // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
701 if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
702 LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
703 return nullptr;
704 }
705
699 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); 706 LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash());
700 707
701 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; 708 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
new file mode 100644
index 000000000..c49583013
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -0,0 +1,457 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/microprofile.h"
5#include "common/settings.h"
6#include "common/thread.h"
7#include "video_core/renderer_vulkan/vk_present_manager.h"
8#include "video_core/renderer_vulkan/vk_scheduler.h"
9#include "video_core/renderer_vulkan/vk_swapchain.h"
10#include "video_core/vulkan_common/vulkan_device.h"
11
12namespace Vulkan {
13
14MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
15MICROPROFILE_DEFINE(Vulkan_CopyToSwapchain, "Vulkan", "Copy to swapchain", MP_RGB(192, 255, 192));
16
17namespace {
18
19bool CanBlitToSwapchain(const vk::PhysicalDevice& physical_device, VkFormat format) {
20 const VkFormatProperties props{physical_device.GetFormatProperties(format)};
21 return (props.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT);
22}
23
24[[nodiscard]] VkImageSubresourceLayers MakeImageSubresourceLayers() {
25 return VkImageSubresourceLayers{
26 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
27 .mipLevel = 0,
28 .baseArrayLayer = 0,
29 .layerCount = 1,
30 };
31}
32
33[[nodiscard]] VkImageBlit MakeImageBlit(s32 frame_width, s32 frame_height, s32 swapchain_width,
34 s32 swapchain_height) {
35 return VkImageBlit{
36 .srcSubresource = MakeImageSubresourceLayers(),
37 .srcOffsets =
38 {
39 {
40 .x = 0,
41 .y = 0,
42 .z = 0,
43 },
44 {
45 .x = frame_width,
46 .y = frame_height,
47 .z = 1,
48 },
49 },
50 .dstSubresource = MakeImageSubresourceLayers(),
51 .dstOffsets =
52 {
53 {
54 .x = 0,
55 .y = 0,
56 .z = 0,
57 },
58 {
59 .x = swapchain_width,
60 .y = swapchain_height,
61 .z = 1,
62 },
63 },
64 };
65}
66
67[[nodiscard]] VkImageCopy MakeImageCopy(u32 frame_width, u32 frame_height, u32 swapchain_width,
68 u32 swapchain_height) {
69 return VkImageCopy{
70 .srcSubresource = MakeImageSubresourceLayers(),
71 .srcOffset =
72 {
73 .x = 0,
74 .y = 0,
75 .z = 0,
76 },
77 .dstSubresource = MakeImageSubresourceLayers(),
78 .dstOffset =
79 {
80 .x = 0,
81 .y = 0,
82 .z = 0,
83 },
84 .extent =
85 {
86 .width = std::min(frame_width, swapchain_width),
87 .height = std::min(frame_height, swapchain_height),
88 .depth = 1,
89 },
90 };
91}
92
93} // Anonymous namespace
94
95PresentManager::PresentManager(Core::Frontend::EmuWindow& render_window_, const Device& device_,
96 MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
97 Swapchain& swapchain_)
98 : render_window{render_window_}, device{device_},
99 memory_allocator{memory_allocator_}, scheduler{scheduler_}, swapchain{swapchain_},
100 blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())},
101 use_present_thread{Settings::values.async_presentation.GetValue()},
102 image_count{swapchain.GetImageCount()} {
103
104 auto& dld = device.GetLogical();
105 cmdpool = dld.CreateCommandPool({
106 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
107 .pNext = nullptr,
108 .flags =
109 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
110 .queueFamilyIndex = device.GetGraphicsFamily(),
111 });
112 auto cmdbuffers = cmdpool.Allocate(image_count);
113
114 frames.resize(image_count);
115 for (u32 i = 0; i < frames.size(); i++) {
116 Frame& frame = frames[i];
117 frame.cmdbuf = vk::CommandBuffer{cmdbuffers[i], device.GetDispatchLoader()};
118 frame.render_ready = dld.CreateSemaphore({
119 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
120 .pNext = nullptr,
121 .flags = 0,
122 });
123 frame.present_done = dld.CreateFence({
124 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
125 .pNext = nullptr,
126 .flags = VK_FENCE_CREATE_SIGNALED_BIT,
127 });
128 free_queue.push(&frame);
129 }
130
131 if (use_present_thread) {
132 present_thread = std::jthread([this](std::stop_token token) { PresentThread(token); });
133 }
134}
135
136PresentManager::~PresentManager() = default;
137
138Frame* PresentManager::GetRenderFrame() {
139 MICROPROFILE_SCOPE(Vulkan_WaitPresent);
140
141 // Wait for free presentation frames
142 std::unique_lock lock{free_mutex};
143 free_cv.wait(lock, [this] { return !free_queue.empty(); });
144
145 // Take the frame from the queue
146 Frame* frame = free_queue.front();
147 free_queue.pop();
148
149 // Wait for the presentation to be finished so all frame resources are free
150 frame->present_done.Wait();
151 frame->present_done.Reset();
152
153 return frame;
154}
155
156void PresentManager::Present(Frame* frame) {
157 if (!use_present_thread) {
158 scheduler.WaitWorker();
159 CopyToSwapchain(frame);
160 free_queue.push(frame);
161 return;
162 }
163
164 scheduler.Record([this, frame](vk::CommandBuffer) {
165 std::unique_lock lock{queue_mutex};
166 present_queue.push(frame);
167 frame_cv.notify_one();
168 });
169}
170
171void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
172 VkFormat image_view_format, VkRenderPass rd) {
173 auto& dld = device.GetLogical();
174
175 frame->width = width;
176 frame->height = height;
177 frame->is_srgb = is_srgb;
178
179 frame->image = dld.CreateImage({
180 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
181 .pNext = nullptr,
182 .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
183 .imageType = VK_IMAGE_TYPE_2D,
184 .format = swapchain.GetImageFormat(),
185 .extent =
186 {
187 .width = width,
188 .height = height,
189 .depth = 1,
190 },
191 .mipLevels = 1,
192 .arrayLayers = 1,
193 .samples = VK_SAMPLE_COUNT_1_BIT,
194 .tiling = VK_IMAGE_TILING_OPTIMAL,
195 .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
196 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
197 .queueFamilyIndexCount = 0,
198 .pQueueFamilyIndices = nullptr,
199 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
200 });
201
202 frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
203
204 frame->image_view = dld.CreateImageView({
205 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
206 .pNext = nullptr,
207 .flags = 0,
208 .image = *frame->image,
209 .viewType = VK_IMAGE_VIEW_TYPE_2D,
210 .format = image_view_format,
211 .components =
212 {
213 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
214 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
215 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
216 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
217 },
218 .subresourceRange =
219 {
220 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
221 .baseMipLevel = 0,
222 .levelCount = 1,
223 .baseArrayLayer = 0,
224 .layerCount = 1,
225 },
226 });
227
228 const VkImageView image_view{*frame->image_view};
229 frame->framebuffer = dld.CreateFramebuffer({
230 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
231 .pNext = nullptr,
232 .flags = 0,
233 .renderPass = rd,
234 .attachmentCount = 1,
235 .pAttachments = &image_view,
236 .width = width,
237 .height = height,
238 .layers = 1,
239 });
240}
241
242void PresentManager::WaitPresent() {
243 if (!use_present_thread) {
244 return;
245 }
246
247 // Wait for the present queue to be empty
248 {
249 std::unique_lock queue_lock{queue_mutex};
250 frame_cv.wait(queue_lock, [this] { return present_queue.empty(); });
251 }
252
253 // The above condition will be satisfied when the last frame is taken from the queue.
254 // To ensure that frame has been presented as well take hold of the swapchain
255 // mutex.
256 std::scoped_lock swapchain_lock{swapchain_mutex};
257}
258
259void PresentManager::PresentThread(std::stop_token token) {
260 Common::SetCurrentThreadName("VulkanPresent");
261 while (!token.stop_requested()) {
262 std::unique_lock lock{queue_mutex};
263
264 // Wait for presentation frames
265 Common::CondvarWait(frame_cv, lock, token, [this] { return !present_queue.empty(); });
266 if (token.stop_requested()) {
267 return;
268 }
269
270 // Take the frame and notify anyone waiting
271 Frame* frame = present_queue.front();
272 present_queue.pop();
273 frame_cv.notify_one();
274
275 // By exchanging the lock ownership we take the swapchain lock
276 // before the queue lock goes out of scope. This way the swapchain
277 // lock in WaitPresent is guaranteed to occur after here.
278 std::exchange(lock, std::unique_lock{swapchain_mutex});
279
280 CopyToSwapchain(frame);
281
282 // Free the frame for reuse
283 std::scoped_lock fl{free_mutex};
284 free_queue.push(frame);
285 free_cv.notify_one();
286 }
287}
288
289void PresentManager::CopyToSwapchain(Frame* frame) {
290 MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain);
291
292 const auto recreate_swapchain = [&] {
293 swapchain.Create(frame->width, frame->height, frame->is_srgb);
294 image_count = swapchain.GetImageCount();
295 };
296
297 // If the size or colorspace of the incoming frames has changed, recreate the swapchain
298 // to account for that.
299 const bool srgb_changed = swapchain.NeedsRecreation(frame->is_srgb);
300 const bool size_changed =
301 swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height;
302 if (srgb_changed || size_changed) {
303 recreate_swapchain();
304 }
305
306 while (swapchain.AcquireNextImage()) {
307 recreate_swapchain();
308 }
309
310 const vk::CommandBuffer cmdbuf{frame->cmdbuf};
311 cmdbuf.Begin({
312 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
313 .pNext = nullptr,
314 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
315 .pInheritanceInfo = nullptr,
316 });
317
318 const VkImage image{swapchain.CurrentImage()};
319 const VkExtent2D extent = swapchain.GetExtent();
320 const std::array pre_barriers{
321 VkImageMemoryBarrier{
322 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
323 .pNext = nullptr,
324 .srcAccessMask = 0,
325 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
326 .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
327 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
328 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
329 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
330 .image = image,
331 .subresourceRange{
332 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
333 .baseMipLevel = 0,
334 .levelCount = 1,
335 .baseArrayLayer = 0,
336 .layerCount = VK_REMAINING_ARRAY_LAYERS,
337 },
338 },
339 VkImageMemoryBarrier{
340 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
341 .pNext = nullptr,
342 .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
343 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
344 .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
345 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
346 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
347 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
348 .image = *frame->image,
349 .subresourceRange{
350 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
351 .baseMipLevel = 0,
352 .levelCount = 1,
353 .baseArrayLayer = 0,
354 .layerCount = VK_REMAINING_ARRAY_LAYERS,
355 },
356 },
357 };
358 const std::array post_barriers{
359 VkImageMemoryBarrier{
360 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
361 .pNext = nullptr,
362 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
363 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT,
364 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
365 .newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
366 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
367 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
368 .image = image,
369 .subresourceRange{
370 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
371 .baseMipLevel = 0,
372 .levelCount = 1,
373 .baseArrayLayer = 0,
374 .layerCount = VK_REMAINING_ARRAY_LAYERS,
375 },
376 },
377 VkImageMemoryBarrier{
378 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
379 .pNext = nullptr,
380 .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
381 .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
382 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
383 .newLayout = VK_IMAGE_LAYOUT_GENERAL,
384 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
385 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
386 .image = *frame->image,
387 .subresourceRange{
388 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
389 .baseMipLevel = 0,
390 .levelCount = 1,
391 .baseArrayLayer = 0,
392 .layerCount = VK_REMAINING_ARRAY_LAYERS,
393 },
394 },
395 };
396
397 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {},
398 {}, {}, pre_barriers);
399
400 if (blit_supported) {
401 cmdbuf.BlitImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
402 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
403 MakeImageBlit(frame->width, frame->height, extent.width, extent.height),
404 VK_FILTER_LINEAR);
405 } else {
406 cmdbuf.CopyImage(*frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image,
407 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
408 MakeImageCopy(frame->width, frame->height, extent.width, extent.height));
409 }
410
411 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, {},
412 {}, {}, post_barriers);
413
414 cmdbuf.End();
415
416 const VkSemaphore present_semaphore = swapchain.CurrentPresentSemaphore();
417 const VkSemaphore render_semaphore = swapchain.CurrentRenderSemaphore();
418 const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
419
420 static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
421 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
422 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
423 };
424
425 const VkSubmitInfo submit_info{
426 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
427 .pNext = nullptr,
428 .waitSemaphoreCount = 2U,
429 .pWaitSemaphores = wait_semaphores.data(),
430 .pWaitDstStageMask = wait_stage_masks.data(),
431 .commandBufferCount = 1,
432 .pCommandBuffers = cmdbuf.address(),
433 .signalSemaphoreCount = 1U,
434 .pSignalSemaphores = &render_semaphore,
435 };
436
437 // Submit the image copy/blit to the swapchain
438 {
439 std::scoped_lock lock{scheduler.submit_mutex};
440 switch (const VkResult result =
441 device.GetGraphicsQueue().Submit(submit_info, *frame->present_done)) {
442 case VK_SUCCESS:
443 break;
444 case VK_ERROR_DEVICE_LOST:
445 device.ReportLoss();
446 [[fallthrough]];
447 default:
448 vk::Check(result);
449 break;
450 }
451 }
452
453 // Present
454 swapchain.Present(render_semaphore);
455}
456
457} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
new file mode 100644
index 000000000..420a775e2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -0,0 +1,83 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <condition_variable>
7#include <mutex>
8#include <queue>
9
10#include "common/common_types.h"
11#include "common/polyfill_thread.h"
12#include "video_core/vulkan_common/vulkan_memory_allocator.h"
13#include "video_core/vulkan_common/vulkan_wrapper.h"
14
15namespace Core::Frontend {
16class EmuWindow;
17} // namespace Core::Frontend
18
19namespace Vulkan {
20
21class Device;
22class Scheduler;
23class Swapchain;
24
25struct Frame {
26 u32 width;
27 u32 height;
28 bool is_srgb;
29 vk::Image image;
30 vk::ImageView image_view;
31 vk::Framebuffer framebuffer;
32 MemoryCommit image_commit;
33 vk::CommandBuffer cmdbuf;
34 vk::Semaphore render_ready;
35 vk::Fence present_done;
36};
37
38class PresentManager {
39public:
40 PresentManager(Core::Frontend::EmuWindow& render_window, const Device& device,
41 MemoryAllocator& memory_allocator, Scheduler& scheduler, Swapchain& swapchain);
42 ~PresentManager();
43
44 /// Returns the last used presentation frame
45 Frame* GetRenderFrame();
46
47 /// Pushes a frame for presentation
48 void Present(Frame* frame);
49
50 /// Recreates the present frame to match the provided parameters
51 void RecreateFrame(Frame* frame, u32 width, u32 height, bool is_srgb,
52 VkFormat image_view_format, VkRenderPass rd);
53
54 /// Waits for the present thread to finish presenting all queued frames.
55 void WaitPresent();
56
57private:
58 void PresentThread(std::stop_token token);
59
60 void CopyToSwapchain(Frame* frame);
61
62private:
63 Core::Frontend::EmuWindow& render_window;
64 const Device& device;
65 MemoryAllocator& memory_allocator;
66 Scheduler& scheduler;
67 Swapchain& swapchain;
68 vk::CommandPool cmdpool;
69 std::vector<Frame> frames;
70 std::queue<Frame*> present_queue;
71 std::queue<Frame*> free_queue;
72 std::condition_variable_any frame_cv;
73 std::condition_variable free_cv;
74 std::mutex swapchain_mutex;
75 std::mutex queue_mutex;
76 std::mutex free_mutex;
77 std::jthread present_thread;
78 bool blit_supported;
79 bool use_present_thread;
80 std::size_t image_count;
81};
82
83} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 057e16967..80455ec08 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -46,10 +46,11 @@ Scheduler::Scheduler(const Device& device_, StateTracker& state_tracker_)
46 46
47Scheduler::~Scheduler() = default; 47Scheduler::~Scheduler() = default;
48 48
49void Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 49u64 Scheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
50 // When flushing, we only send data to the worker thread; no waiting is necessary. 50 // When flushing, we only send data to the worker thread; no waiting is necessary.
51 SubmitExecution(signal_semaphore, wait_semaphore); 51 const u64 signal_value = SubmitExecution(signal_semaphore, wait_semaphore);
52 AllocateNewContext(); 52 AllocateNewContext();
53 return signal_value;
53} 54}
54 55
55void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 56void Scheduler::Finish(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
@@ -205,7 +206,7 @@ void Scheduler::AllocateWorkerCommandBuffer() {
205 }); 206 });
206} 207}
207 208
208void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { 209u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) {
209 EndPendingOperations(); 210 EndPendingOperations();
210 InvalidateState(); 211 InvalidateState();
211 212
@@ -217,6 +218,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
217 on_submit(); 218 on_submit();
218 } 219 }
219 220
221 std::scoped_lock lock{submit_mutex};
220 switch (const VkResult result = master_semaphore->SubmitQueue( 222 switch (const VkResult result = master_semaphore->SubmitQueue(
221 cmdbuf, signal_semaphore, wait_semaphore, signal_value)) { 223 cmdbuf, signal_semaphore, wait_semaphore, signal_value)) {
222 case VK_SUCCESS: 224 case VK_SUCCESS:
@@ -231,6 +233,7 @@ void Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_s
231 }); 233 });
232 chunk->MarkSubmit(); 234 chunk->MarkSubmit();
233 DispatchWork(); 235 DispatchWork();
236 return signal_value;
234} 237}
235 238
236void Scheduler::AllocateNewContext() { 239void Scheduler::AllocateNewContext() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 8d75ce987..475c682eb 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -34,7 +34,7 @@ public:
34 ~Scheduler(); 34 ~Scheduler();
35 35
36 /// Sends the current execution context to the GPU. 36 /// Sends the current execution context to the GPU.
37 void Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); 37 u64 Flush(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
38 38
39 /// Sends the current execution context to the GPU and waits for it to complete. 39 /// Sends the current execution context to the GPU and waits for it to complete.
40 void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); 40 void Finish(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr);
@@ -106,6 +106,8 @@ public:
106 return *master_semaphore; 106 return *master_semaphore;
107 } 107 }
108 108
109 std::mutex submit_mutex;
110
109private: 111private:
110 class Command { 112 class Command {
111 public: 113 public:
@@ -201,7 +203,7 @@ private:
201 203
202 void AllocateWorkerCommandBuffer(); 204 void AllocateWorkerCommandBuffer();
203 205
204 void SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore); 206 u64 SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore);
205 207
206 void AllocateNewContext(); 208 void AllocateNewContext();
207 209
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index b1465e35c..23bbea7f1 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -99,18 +99,16 @@ void Swapchain::Create(u32 width_, u32 height_, bool srgb) {
99 return; 99 return;
100 } 100 }
101 101
102 device.GetLogical().WaitIdle();
103 Destroy(); 102 Destroy();
104 103
105 CreateSwapchain(capabilities, srgb); 104 CreateSwapchain(capabilities, srgb);
106 CreateSemaphores(); 105 CreateSemaphores();
107 CreateImageViews();
108 106
109 resource_ticks.clear(); 107 resource_ticks.clear();
110 resource_ticks.resize(image_count); 108 resource_ticks.resize(image_count);
111} 109}
112 110
113void Swapchain::AcquireNextImage() { 111bool Swapchain::AcquireNextImage() {
114 const VkResult result = device.GetLogical().AcquireNextImageKHR( 112 const VkResult result = device.GetLogical().AcquireNextImageKHR(
115 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], 113 *swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index],
116 VK_NULL_HANDLE, &image_index); 114 VK_NULL_HANDLE, &image_index);
@@ -127,8 +125,11 @@ void Swapchain::AcquireNextImage() {
127 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result)); 125 LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned {}", vk::ToString(result));
128 break; 126 break;
129 } 127 }
128
130 scheduler.Wait(resource_ticks[image_index]); 129 scheduler.Wait(resource_ticks[image_index]);
131 resource_ticks[image_index] = scheduler.CurrentTick(); 130 resource_ticks[image_index] = scheduler.CurrentTick();
131
132 return is_suboptimal || is_outdated;
132} 133}
133 134
134void Swapchain::Present(VkSemaphore render_semaphore) { 135void Swapchain::Present(VkSemaphore render_semaphore) {
@@ -143,6 +144,7 @@ void Swapchain::Present(VkSemaphore render_semaphore) {
143 .pImageIndices = &image_index, 144 .pImageIndices = &image_index,
144 .pResults = nullptr, 145 .pResults = nullptr,
145 }; 146 };
147 std::scoped_lock lock{scheduler.submit_mutex};
146 switch (const VkResult result = present_queue.Present(present_info)) { 148 switch (const VkResult result = present_queue.Present(present_info)) {
147 case VK_SUCCESS: 149 case VK_SUCCESS:
148 break; 150 break;
@@ -168,7 +170,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
168 const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; 170 const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
169 171
170 const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)}; 172 const VkCompositeAlphaFlagBitsKHR alpha_flags{ChooseAlphaFlags(capabilities)};
171 const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; 173 surface_format = ChooseSwapSurfaceFormat(formats);
172 present_mode = ChooseSwapPresentMode(present_modes); 174 present_mode = ChooseSwapPresentMode(present_modes);
173 175
174 u32 requested_image_count{capabilities.minImageCount + 1}; 176 u32 requested_image_count{capabilities.minImageCount + 1};
@@ -193,7 +195,7 @@ void Swapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bo
193 .imageColorSpace = surface_format.colorSpace, 195 .imageColorSpace = surface_format.colorSpace,
194 .imageExtent = {}, 196 .imageExtent = {},
195 .imageArrayLayers = 1, 197 .imageArrayLayers = 1,
196 .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, 198 .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
197 .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, 199 .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
198 .queueFamilyIndexCount = 0, 200 .queueFamilyIndexCount = 0,
199 .pQueueFamilyIndices = nullptr, 201 .pQueueFamilyIndices = nullptr,
@@ -241,45 +243,14 @@ void Swapchain::CreateSemaphores() {
241 present_semaphores.resize(image_count); 243 present_semaphores.resize(image_count);
242 std::ranges::generate(present_semaphores, 244 std::ranges::generate(present_semaphores,
243 [this] { return device.GetLogical().CreateSemaphore(); }); 245 [this] { return device.GetLogical().CreateSemaphore(); });
244} 246 render_semaphores.resize(image_count);
245 247 std::ranges::generate(render_semaphores,
246void Swapchain::CreateImageViews() { 248 [this] { return device.GetLogical().CreateSemaphore(); });
247 VkImageViewCreateInfo ci{
248 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
249 .pNext = nullptr,
250 .flags = 0,
251 .image = {},
252 .viewType = VK_IMAGE_VIEW_TYPE_2D,
253 .format = image_view_format,
254 .components =
255 {
256 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
257 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
258 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
259 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
260 },
261 .subresourceRange =
262 {
263 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
264 .baseMipLevel = 0,
265 .levelCount = 1,
266 .baseArrayLayer = 0,
267 .layerCount = 1,
268 },
269 };
270
271 image_views.resize(image_count);
272 for (std::size_t i = 0; i < image_count; i++) {
273 ci.image = images[i];
274 image_views[i] = device.GetLogical().CreateImageView(ci);
275 }
276} 249}
277 250
278void Swapchain::Destroy() { 251void Swapchain::Destroy() {
279 frame_index = 0; 252 frame_index = 0;
280 present_semaphores.clear(); 253 present_semaphores.clear();
281 framebuffers.clear();
282 image_views.clear();
283 swapchain.reset(); 254 swapchain.reset();
284} 255}
285 256
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index caf1ff32b..419742586 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -27,7 +27,7 @@ public:
27 void Create(u32 width, u32 height, bool srgb); 27 void Create(u32 width, u32 height, bool srgb);
28 28
29 /// Acquires the next image in the swapchain, waits as needed. 29 /// Acquires the next image in the swapchain, waits as needed.
30 void AcquireNextImage(); 30 bool AcquireNextImage();
31 31
32 /// Presents the rendered image to the swapchain. 32 /// Presents the rendered image to the swapchain.
33 void Present(VkSemaphore render_semaphore); 33 void Present(VkSemaphore render_semaphore);
@@ -52,6 +52,11 @@ public:
52 return is_suboptimal; 52 return is_suboptimal;
53 } 53 }
54 54
55 /// Returns true when the swapchain format is in the srgb color space
56 bool IsSrgb() const {
57 return current_srgb;
58 }
59
55 VkExtent2D GetSize() const { 60 VkExtent2D GetSize() const {
56 return extent; 61 return extent;
57 } 62 }
@@ -64,22 +69,34 @@ public:
64 return image_index; 69 return image_index;
65 } 70 }
66 71
72 std::size_t GetFrameIndex() const {
73 return frame_index;
74 }
75
67 VkImage GetImageIndex(std::size_t index) const { 76 VkImage GetImageIndex(std::size_t index) const {
68 return images[index]; 77 return images[index];
69 } 78 }
70 79
71 VkImageView GetImageViewIndex(std::size_t index) const { 80 VkImage CurrentImage() const {
72 return *image_views[index]; 81 return images[image_index];
73 } 82 }
74 83
75 VkFormat GetImageViewFormat() const { 84 VkFormat GetImageViewFormat() const {
76 return image_view_format; 85 return image_view_format;
77 } 86 }
78 87
88 VkFormat GetImageFormat() const {
89 return surface_format.format;
90 }
91
79 VkSemaphore CurrentPresentSemaphore() const { 92 VkSemaphore CurrentPresentSemaphore() const {
80 return *present_semaphores[frame_index]; 93 return *present_semaphores[frame_index];
81 } 94 }
82 95
96 VkSemaphore CurrentRenderSemaphore() const {
97 return *render_semaphores[frame_index];
98 }
99
83 u32 GetWidth() const { 100 u32 GetWidth() const {
84 return width; 101 return width;
85 } 102 }
@@ -88,6 +105,10 @@ public:
88 return height; 105 return height;
89 } 106 }
90 107
108 VkExtent2D GetExtent() const {
109 return extent;
110 }
111
91private: 112private:
92 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb); 113 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, bool srgb);
93 void CreateSemaphores(); 114 void CreateSemaphores();
@@ -107,10 +128,9 @@ private:
107 128
108 std::size_t image_count{}; 129 std::size_t image_count{};
109 std::vector<VkImage> images; 130 std::vector<VkImage> images;
110 std::vector<vk::ImageView> image_views;
111 std::vector<vk::Framebuffer> framebuffers;
112 std::vector<u64> resource_ticks; 131 std::vector<u64> resource_ticks;
113 std::vector<vk::Semaphore> present_semaphores; 132 std::vector<vk::Semaphore> present_semaphores;
133 std::vector<vk::Semaphore> render_semaphores;
114 134
115 u32 width; 135 u32 width;
116 u32 height; 136 u32 height;
@@ -121,6 +141,7 @@ private:
121 VkFormat image_view_format{}; 141 VkFormat image_view_format{};
122 VkExtent2D extent{}; 142 VkExtent2D extent{};
123 VkPresentModeKHR present_mode{}; 143 VkPresentModeKHR present_mode{};
144 VkSurfaceFormatKHR surface_format{};
124 145
125 bool current_srgb{}; 146 bool current_srgb{};
126 bool current_fps_unlocked{}; 147 bool current_fps_unlocked{};
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 009dab0b6..0630ebda5 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -14,13 +14,18 @@ namespace Vulkan {
14 14
15UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_) 15UpdateDescriptorQueue::UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_)
16 : device{device_}, scheduler{scheduler_} { 16 : device{device_}, scheduler{scheduler_} {
17 payload_start = payload.data();
17 payload_cursor = payload.data(); 18 payload_cursor = payload.data();
18} 19}
19 20
20UpdateDescriptorQueue::~UpdateDescriptorQueue() = default; 21UpdateDescriptorQueue::~UpdateDescriptorQueue() = default;
21 22
22void UpdateDescriptorQueue::TickFrame() { 23void UpdateDescriptorQueue::TickFrame() {
23 payload_cursor = payload.data(); 24 if (++frame_index >= FRAMES_IN_FLIGHT) {
25 frame_index = 0;
26 }
27 payload_start = payload.data() + frame_index * FRAME_PAYLOAD_SIZE;
28 payload_cursor = payload_start;
24} 29}
25 30
26void UpdateDescriptorQueue::Acquire() { 31void UpdateDescriptorQueue::Acquire() {
@@ -28,10 +33,10 @@ void UpdateDescriptorQueue::Acquire() {
28 // This is the maximum number of entries a single draw call might use. 33 // This is the maximum number of entries a single draw call might use.
29 static constexpr size_t MIN_ENTRIES = 0x400; 34 static constexpr size_t MIN_ENTRIES = 0x400;
30 35
31 if (std::distance(payload.data(), payload_cursor) + MIN_ENTRIES >= payload.max_size()) { 36 if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
32 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); 37 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
33 scheduler.WaitWorker(); 38 scheduler.WaitWorker();
34 payload_cursor = payload.data(); 39 payload_cursor = payload_start;
35 } 40 }
36 upload_start = payload_cursor; 41 upload_start = payload_cursor;
37} 42}
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 625bcc809..1c1a7020b 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -29,6 +29,12 @@ struct DescriptorUpdateEntry {
29}; 29};
30 30
31class UpdateDescriptorQueue final { 31class UpdateDescriptorQueue final {
32 // This should be plenty for the vast majority of cases. Most desktop platforms only
33 // provide up to 3 swapchain images.
34 static constexpr size_t FRAMES_IN_FLIGHT = 5;
35 static constexpr size_t FRAME_PAYLOAD_SIZE = 0x10000;
36 static constexpr size_t PAYLOAD_SIZE = FRAME_PAYLOAD_SIZE * FRAMES_IN_FLIGHT;
37
32public: 38public:
33 explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_); 39 explicit UpdateDescriptorQueue(const Device& device_, Scheduler& scheduler_);
34 ~UpdateDescriptorQueue(); 40 ~UpdateDescriptorQueue();
@@ -73,9 +79,11 @@ private:
73 const Device& device; 79 const Device& device;
74 Scheduler& scheduler; 80 Scheduler& scheduler;
75 81
82 size_t frame_index{0};
76 DescriptorUpdateEntry* payload_cursor = nullptr; 83 DescriptorUpdateEntry* payload_cursor = nullptr;
84 DescriptorUpdateEntry* payload_start = nullptr;
77 const DescriptorUpdateEntry* upload_start = nullptr; 85 const DescriptorUpdateEntry* upload_start = nullptr;
78 std::array<DescriptorUpdateEntry, 0x10000> payload; 86 std::array<DescriptorUpdateEntry, PAYLOAD_SIZE> payload;
79}; 87};
80 88
81} // namespace Vulkan 89} // namespace Vulkan
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index e601f8446..f335009d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -888,7 +888,7 @@ void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* i
888 buffer, 888 buffer,
889 download_map.buffer, 889 download_map.buffer,
890 }; 890 };
891 std::array buffer_offsets{ 891 std::array<u64, 2> buffer_offsets{
892 buffer_offset, 892 buffer_offset,
893 download_map.offset, 893 download_map.offset,
894 }; 894 };
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 6f288b3f8..6ffca2af2 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -617,7 +617,9 @@ bool Device::ShouldBoostClocks() const {
617 617
618 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F; 618 const bool is_steam_deck = vendor_id == 0x1002 && device_id == 0x163F;
619 619
620 return validated_driver && !is_steam_deck; 620 const bool is_debugging = this->HasDebuggingToolAttached();
621
622 return validated_driver && !is_steam_deck && !is_debugging;
621} 623}
622 624
623bool Device::GetSuitability(bool requires_swapchain) { 625bool Device::GetSuitability(bool requires_swapchain) {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index bb731276e..be33e4d79 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -497,7 +497,7 @@ void Config::ReadCoreValues() {
497 qt_config->beginGroup(QStringLiteral("Core")); 497 qt_config->beginGroup(QStringLiteral("Core"));
498 498
499 ReadGlobalSetting(Settings::values.use_multi_core); 499 ReadGlobalSetting(Settings::values.use_multi_core);
500 ReadGlobalSetting(Settings::values.use_extended_memory_layout); 500 ReadGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
501 501
502 qt_config->endGroup(); 502 qt_config->endGroup();
503} 503}
@@ -692,6 +692,7 @@ void Config::ReadRendererValues() {
692 qt_config->beginGroup(QStringLiteral("Renderer")); 692 qt_config->beginGroup(QStringLiteral("Renderer"));
693 693
694 ReadGlobalSetting(Settings::values.renderer_backend); 694 ReadGlobalSetting(Settings::values.renderer_backend);
695 ReadGlobalSetting(Settings::values.async_presentation);
695 ReadGlobalSetting(Settings::values.renderer_force_max_clock); 696 ReadGlobalSetting(Settings::values.renderer_force_max_clock);
696 ReadGlobalSetting(Settings::values.vulkan_device); 697 ReadGlobalSetting(Settings::values.vulkan_device);
697 ReadGlobalSetting(Settings::values.fullscreen_mode); 698 ReadGlobalSetting(Settings::values.fullscreen_mode);
@@ -1161,7 +1162,7 @@ void Config::SaveCoreValues() {
1161 qt_config->beginGroup(QStringLiteral("Core")); 1162 qt_config->beginGroup(QStringLiteral("Core"));
1162 1163
1163 WriteGlobalSetting(Settings::values.use_multi_core); 1164 WriteGlobalSetting(Settings::values.use_multi_core);
1164 WriteGlobalSetting(Settings::values.use_extended_memory_layout); 1165 WriteGlobalSetting(Settings::values.use_unsafe_extended_memory_layout);
1165 1166
1166 qt_config->endGroup(); 1167 qt_config->endGroup();
1167} 1168}
@@ -1313,6 +1314,7 @@ void Config::SaveRendererValues() {
1313 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), 1314 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
1314 static_cast<u32>(Settings::values.renderer_backend.GetDefault()), 1315 static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
1315 Settings::values.renderer_backend.UsingGlobal()); 1316 Settings::values.renderer_backend.UsingGlobal());
1317 WriteGlobalSetting(Settings::values.async_presentation);
1316 WriteGlobalSetting(Settings::values.renderer_force_max_clock); 1318 WriteGlobalSetting(Settings::values.renderer_force_max_clock);
1317 WriteGlobalSetting(Settings::values.vulkan_device); 1319 WriteGlobalSetting(Settings::values.vulkan_device);
1318 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), 1320 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 207bcdc4d..26258d744 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -35,9 +35,6 @@ void ConfigureGeneral::SetConfiguration() {
35 35
36 ui->use_multi_core->setEnabled(runtime_lock); 36 ui->use_multi_core->setEnabled(runtime_lock);
37 ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue()); 37 ui->use_multi_core->setChecked(Settings::values.use_multi_core.GetValue());
38 ui->use_extended_memory_layout->setEnabled(runtime_lock);
39 ui->use_extended_memory_layout->setChecked(
40 Settings::values.use_extended_memory_layout.GetValue());
41 38
42 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue()); 39 ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing.GetValue());
43 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue()); 40 ui->toggle_user_on_boot->setChecked(UISettings::values.select_user_on_boot.GetValue());
@@ -79,9 +76,6 @@ void ConfigureGeneral::ResetDefaults() {
79void ConfigureGeneral::ApplyConfiguration() { 76void ConfigureGeneral::ApplyConfiguration() {
80 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core, 77 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_multi_core, ui->use_multi_core,
81 use_multi_core); 78 use_multi_core);
82 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_extended_memory_layout,
83 ui->use_extended_memory_layout,
84 use_extended_memory_layout);
85 79
86 if (Settings::IsConfiguringGlobal()) { 80 if (Settings::IsConfiguringGlobal()) {
87 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked(); 81 UISettings::values.confirm_before_closing = ui->toggle_check_exit->isChecked();
@@ -141,9 +135,6 @@ void ConfigureGeneral::SetupPerGameUI() {
141 Settings::values.use_speed_limit, use_speed_limit); 135 Settings::values.use_speed_limit, use_speed_limit);
142 ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core, 136 ConfigurationShared::SetColoredTristate(ui->use_multi_core, Settings::values.use_multi_core,
143 use_multi_core); 137 use_multi_core);
144 ConfigurationShared::SetColoredTristate(ui->use_extended_memory_layout,
145 Settings::values.use_extended_memory_layout,
146 use_extended_memory_layout);
147 138
148 connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() { 139 connect(ui->toggle_speed_limit, &QCheckBox::clicked, ui->speed_limit, [this]() {
149 ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() && 140 ui->speed_limit->setEnabled(ui->toggle_speed_limit->isChecked() &&
diff --git a/src/yuzu/configuration/configure_general.h b/src/yuzu/configuration/configure_general.h
index a090c1a3f..7ff63f425 100644
--- a/src/yuzu/configuration/configure_general.h
+++ b/src/yuzu/configuration/configure_general.h
@@ -47,7 +47,6 @@ private:
47 47
48 ConfigurationShared::CheckState use_speed_limit; 48 ConfigurationShared::CheckState use_speed_limit;
49 ConfigurationShared::CheckState use_multi_core; 49 ConfigurationShared::CheckState use_multi_core;
50 ConfigurationShared::CheckState use_extended_memory_layout;
51 50
52 const Core::System& system; 51 const Core::System& system;
53}; 52};
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index add110bb0..986a1625b 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -62,13 +62,6 @@
62 </widget> 62 </widget>
63 </item> 63 </item>
64 <item> 64 <item>
65 <widget class="QCheckBox" name="use_extended_memory_layout">
66 <property name="text">
67 <string>Extended memory layout (8GB DRAM)</string>
68 </property>
69 </widget>
70 </item>
71 <item>
72 <widget class="QCheckBox" name="toggle_check_exit"> 65 <widget class="QCheckBox" name="toggle_check_exit">
73 <property name="text"> 66 <property name="text">
74 <string>Confirm exit while emulation is running</string> 67 <string>Confirm exit while emulation is running</string>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 59fb1b334..7f7bf0e4d 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -22,11 +22,13 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
22void ConfigureGraphicsAdvanced::SetConfiguration() { 22void ConfigureGraphicsAdvanced::SetConfiguration() {
23 const bool runtime_lock = !system.IsPoweredOn(); 23 const bool runtime_lock = !system.IsPoweredOn();
24 ui->use_vsync->setEnabled(runtime_lock); 24 ui->use_vsync->setEnabled(runtime_lock);
25 ui->async_present->setEnabled(runtime_lock);
25 ui->renderer_force_max_clock->setEnabled(runtime_lock); 26 ui->renderer_force_max_clock->setEnabled(runtime_lock);
26 ui->async_astc->setEnabled(runtime_lock); 27 ui->async_astc->setEnabled(runtime_lock);
27 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 28 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
28 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 29 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
29 30
31 ui->async_present->setChecked(Settings::values.async_presentation.GetValue());
30 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); 32 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
31 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); 33 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
32 ui->async_astc->setChecked(Settings::values.async_astc.GetValue()); 34 ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
@@ -54,6 +56,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
54 56
55void ConfigureGraphicsAdvanced::ApplyConfiguration() { 57void ConfigureGraphicsAdvanced::ApplyConfiguration() {
56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); 58 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
59 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation,
60 ui->async_present, async_present);
57 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, 61 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
58 ui->renderer_force_max_clock, 62 ui->renderer_force_max_clock,
59 renderer_force_max_clock); 63 renderer_force_max_clock);
@@ -90,6 +94,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
90 // Disable if not global (only happens during game) 94 // Disable if not global (only happens during game)
91 if (Settings::IsConfiguringGlobal()) { 95 if (Settings::IsConfiguringGlobal()) {
92 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); 96 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
97 ui->async_present->setEnabled(Settings::values.async_presentation.UsingGlobal());
93 ui->renderer_force_max_clock->setEnabled( 98 ui->renderer_force_max_clock->setEnabled(
94 Settings::values.renderer_force_max_clock.UsingGlobal()); 99 Settings::values.renderer_force_max_clock.UsingGlobal());
95 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); 100 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
@@ -107,6 +112,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
107 return; 112 return;
108 } 113 }
109 114
115 ConfigurationShared::SetColoredTristate(ui->async_present, Settings::values.async_presentation,
116 async_present);
110 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, 117 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
111 Settings::values.renderer_force_max_clock, 118 Settings::values.renderer_force_max_clock,
112 renderer_force_max_clock); 119 renderer_force_max_clock);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index bf1b04749..5394ed40a 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -36,6 +36,7 @@ private:
36 36
37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; 37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
38 38
39 ConfigurationShared::CheckState async_present;
39 ConfigurationShared::CheckState renderer_force_max_clock; 40 ConfigurationShared::CheckState renderer_force_max_clock;
40 ConfigurationShared::CheckState use_vsync; 41 ConfigurationShared::CheckState use_vsync;
41 ConfigurationShared::CheckState async_astc; 42 ConfigurationShared::CheckState async_astc;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index a7dbdc18c..d7ec18939 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -7,7 +7,7 @@
7 <x>0</x> 7 <x>0</x>
8 <y>0</y> 8 <y>0</y>
9 <width>404</width> 9 <width>404</width>
10 <height>321</height> 10 <height>376</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="windowTitle"> 13 <property name="windowTitle">
@@ -70,6 +70,13 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QCheckBox" name="async_present">
74 <property name="text">
75 <string>Enable asynchronous presentation (Vulkan only)</string>
76 </property>
77 </widget>
78 </item>
79 <item>
73 <widget class="QCheckBox" name="renderer_force_max_clock"> 80 <widget class="QCheckBox" name="renderer_force_max_clock">
74 <property name="toolTip"> 81 <property name="toolTip">
75 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> 82 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
@@ -112,7 +119,7 @@
112 <item> 119 <item>
113 <widget class="QCheckBox" name="use_fast_gpu_time"> 120 <widget class="QCheckBox" name="use_fast_gpu_time">
114 <property name="toolTip"> 121 <property name="toolTip">
115 <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> 122 <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
116 </property> 123 </property>
117 <property name="text"> 124 <property name="text">
118 <string>Use Fast GPU Time (Hack)</string> 125 <string>Use Fast GPU Time (Hack)</string>
@@ -122,7 +129,7 @@
122 <item> 129 <item>
123 <widget class="QCheckBox" name="use_pessimistic_flushes"> 130 <widget class="QCheckBox" name="use_pessimistic_flushes">
124 <property name="toolTip"> 131 <property name="toolTip">
125 <string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string> 132 <string>Enables pessimistic buffer flushes. This option will force unmodified buffers to be flushed, which can cost performance.</string>
126 </property> 133 </property>
127 <property name="text"> 134 <property name="text">
128 <string>Use pessimistic buffer flushes (Hack)</string> 135 <string>Use pessimistic buffer flushes (Hack)</string>
@@ -132,7 +139,7 @@
132 <item> 139 <item>
133 <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> 140 <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
134 <property name="toolTip"> 141 <property name="toolTip">
135 <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> 142 <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
136 </property> 143 </property>
137 <property name="text"> 144 <property name="text">
138 <string>Use Vulkan pipeline cache</string> 145 <string>Use Vulkan pipeline cache</string>
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index 6af34f793..286ccc5cd 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -111,6 +111,9 @@ void ConfigureSystem::SetConfiguration() {
111 ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time)); 111 ui->custom_rtc_edit->setDateTime(QDateTime::fromSecsSinceEpoch(rtc_time));
112 ui->device_name_edit->setText( 112 ui->device_name_edit->setText(
113 QString::fromUtf8(Settings::values.device_name.GetValue().c_str())); 113 QString::fromUtf8(Settings::values.device_name.GetValue().c_str()));
114 ui->use_unsafe_extended_memory_layout->setEnabled(enabled);
115 ui->use_unsafe_extended_memory_layout->setChecked(
116 Settings::values.use_unsafe_extended_memory_layout.GetValue());
114 117
115 if (Settings::IsConfiguringGlobal()) { 118 if (Settings::IsConfiguringGlobal()) {
116 ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue()); 119 ui->combo_language->setCurrentIndex(Settings::values.language_index.GetValue());
@@ -160,6 +163,9 @@ void ConfigureSystem::ApplyConfiguration() {
160 ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region); 163 ConfigurationShared::ApplyPerGameSetting(&Settings::values.region_index, ui->combo_region);
161 ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index, 164 ConfigurationShared::ApplyPerGameSetting(&Settings::values.time_zone_index,
162 ui->combo_time_zone); 165 ui->combo_time_zone);
166 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_unsafe_extended_memory_layout,
167 ui->use_unsafe_extended_memory_layout,
168 use_unsafe_extended_memory_layout);
163 169
164 if (Settings::IsConfiguringGlobal()) { 170 if (Settings::IsConfiguringGlobal()) {
165 // Guard if during game and set to game-specific value 171 // Guard if during game and set to game-specific value
@@ -215,6 +221,10 @@ void ConfigureSystem::SetupPerGameUI() {
215 Settings::values.rng_seed.GetValue().has_value(), 221 Settings::values.rng_seed.GetValue().has_value(),
216 Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed); 222 Settings::values.rng_seed.GetValue(true).has_value(), use_rng_seed);
217 223
224 ConfigurationShared::SetColoredTristate(ui->use_unsafe_extended_memory_layout,
225 Settings::values.use_unsafe_extended_memory_layout,
226 use_unsafe_extended_memory_layout);
227
218 ui->custom_rtc_checkbox->setVisible(false); 228 ui->custom_rtc_checkbox->setVisible(false);
219 ui->custom_rtc_edit->setVisible(false); 229 ui->custom_rtc_edit->setVisible(false);
220} 230}
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index ec28724a1..ce1a91601 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -41,6 +41,7 @@ private:
41 bool enabled = false; 41 bool enabled = false;
42 42
43 ConfigurationShared::CheckState use_rng_seed; 43 ConfigurationShared::CheckState use_rng_seed;
44 ConfigurationShared::CheckState use_unsafe_extended_memory_layout;
44 45
45 Core::System& system; 46 Core::System& system;
46}; 47};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 9e7bc3b93..e0caecd5e 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -478,6 +478,13 @@
478 </property> 478 </property>
479 </widget> 479 </widget>
480 </item> 480 </item>
481 <item row="7" column="0">
482 <widget class="QCheckBox" name="use_unsafe_extended_memory_layout">
483 <property name="text">
484 <string>Unsafe extended memory layout (8GB DRAM)</string>
485 </property>
486 </widget>
487 </item>
481 </layout> 488 </layout>
482 </item> 489 </item>
483 </layout> 490 </layout>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index b79409a68..ba9eece1d 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -27,6 +27,7 @@
27#include "configuration/configure_input.h" 27#include "configuration/configure_input.h"
28#include "configuration/configure_per_game.h" 28#include "configuration/configure_per_game.h"
29#include "configuration/configure_tas.h" 29#include "configuration/configure_tas.h"
30#include "core/file_sys/romfs_factory.h"
30#include "core/file_sys/vfs.h" 31#include "core/file_sys/vfs.h"
31#include "core/file_sys/vfs_real.h" 32#include "core/file_sys/vfs_real.h"
32#include "core/frontend/applets/cabinet.h" 33#include "core/frontend/applets/cabinet.h"
@@ -4171,6 +4172,8 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
4171 } 4172 }
4172 4173
4173 Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance(); 4174 Core::Crypto::KeyManager& keys = Core::Crypto::KeyManager::Instance();
4175 bool all_keys_present{true};
4176
4174 if (keys.BaseDeriveNecessary()) { 4177 if (keys.BaseDeriveNecessary()) {
4175 Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)}; 4178 Core::Crypto::PartitionDataManager pdm{vfs->OpenDirectory("", FileSys::Mode::Read)};
4176 4179
@@ -4195,6 +4198,7 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
4195 errors += tr(" - Missing PRODINFO"); 4198 errors += tr(" - Missing PRODINFO");
4196 } 4199 }
4197 if (!errors.isEmpty()) { 4200 if (!errors.isEmpty()) {
4201 all_keys_present = false;
4198 QMessageBox::warning( 4202 QMessageBox::warning(
4199 this, tr("Derivation Components Missing"), 4203 this, tr("Derivation Components Missing"),
4200 tr("Encryption keys are missing. " 4204 tr("Encryption keys are missing. "
@@ -4222,11 +4226,40 @@ void GMainWindow::OnReinitializeKeys(ReinitializeKeyBehavior behavior) {
4222 4226
4223 system->GetFileSystemController().CreateFactories(*vfs); 4227 system->GetFileSystemController().CreateFactories(*vfs);
4224 4228
4229 if (all_keys_present && !this->CheckSystemArchiveDecryption()) {
4230 LOG_WARNING(Frontend, "Mii model decryption failed");
4231 QMessageBox::warning(
4232 this, tr("System Archive Decryption Failed"),
4233 tr("Encryption keys failed to decrypt firmware. "
4234 "<br>Please follow <a href='https://yuzu-emu.org/help/quickstart/'>the yuzu "
4235 "quickstart guide</a> to get all your keys, firmware and "
4236 "games."));
4237 }
4238
4225 if (behavior == ReinitializeKeyBehavior::Warning) { 4239 if (behavior == ReinitializeKeyBehavior::Warning) {
4226 game_list->PopulateAsync(UISettings::values.game_dirs); 4240 game_list->PopulateAsync(UISettings::values.game_dirs);
4227 } 4241 }
4228} 4242}
4229 4243
4244bool GMainWindow::CheckSystemArchiveDecryption() {
4245 constexpr u64 MiiModelId = 0x0100000000000802;
4246
4247 auto bis_system = system->GetFileSystemController().GetSystemNANDContents();
4248 if (!bis_system) {
4249 // Not having system BIS files is not an error.
4250 return true;
4251 }
4252
4253 auto mii_nca = bis_system->GetEntry(MiiModelId, FileSys::ContentRecordType::Data);
4254 if (!mii_nca) {
4255 // Not having the Mii model is not an error.
4256 return true;
4257 }
4258
4259 // Return whether we are able to decrypt the RomFS of the Mii model.
4260 return mii_nca->GetRomFS().get() != nullptr;
4261}
4262
4230std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed, 4263std::optional<u64> GMainWindow::SelectRomFSDumpTarget(const FileSys::ContentProvider& installed,
4231 u64 program_id) { 4264 u64 program_id) {
4232 const auto dlc_entries = 4265 const auto dlc_entries =
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 8b5c1d747..3bbc31ada 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -392,6 +392,7 @@ private:
392 void LoadTranslation(); 392 void LoadTranslation();
393 void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); 393 void OpenPerGameConfiguration(u64 title_id, const std::string& file_name);
394 bool CheckDarkMode(); 394 bool CheckDarkMode();
395 bool CheckSystemArchiveDecryption();
395 396
396 QString GetTasStateDescription() const; 397 QString GetTasStateDescription() const;
397 bool CreateShortcut(const std::string& shortcut_path, const std::string& title, 398 bool CreateShortcut(const std::string& shortcut_path, const std::string& title,
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 464da3231..e4f91d07c 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -274,7 +274,7 @@ void Config::ReadValues() {
274 274
275 // Core 275 // Core
276 ReadSetting("Core", Settings::values.use_multi_core); 276 ReadSetting("Core", Settings::values.use_multi_core);
277 ReadSetting("Core", Settings::values.use_extended_memory_layout); 277 ReadSetting("Core", Settings::values.use_unsafe_extended_memory_layout);
278 278
279 // Cpu 279 // Cpu
280 ReadSetting("Cpu", Settings::values.cpu_accuracy); 280 ReadSetting("Cpu", Settings::values.cpu_accuracy);
@@ -300,6 +300,7 @@ void Config::ReadValues() {
300 300
301 // Renderer 301 // Renderer
302 ReadSetting("Renderer", Settings::values.renderer_backend); 302 ReadSetting("Renderer", Settings::values.renderer_backend);
303 ReadSetting("Renderer", Settings::values.async_presentation);
303 ReadSetting("Renderer", Settings::values.renderer_force_max_clock); 304 ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
304 ReadSetting("Renderer", Settings::values.renderer_debug); 305 ReadSetting("Renderer", Settings::values.renderer_debug);
305 ReadSetting("Renderer", Settings::values.renderer_shader_feedback); 306 ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 209cfc28a..f714eae17 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -163,9 +163,9 @@ keyboard_enabled =
163# 0: Disabled, 1 (default): Enabled 163# 0: Disabled, 1 (default): Enabled
164use_multi_core = 164use_multi_core =
165 165
166# Enable extended guest system memory layout (8GB DRAM) 166# Enable unsafe extended guest system memory layout (8GB DRAM)
167# 0 (default): Disabled, 1: Enabled 167# 0 (default): Disabled, 1: Enabled
168use_extended_memory_layout = 168use_unsafe_extended_memory_layout =
169 169
170[Cpu] 170[Cpu]
171# Adjusts various optimizations. 171# Adjusts various optimizations.
@@ -264,6 +264,10 @@ cpuopt_unsafe_ignore_global_monitor =
264# 0: OpenGL, 1 (default): Vulkan 264# 0: OpenGL, 1 (default): Vulkan
265backend = 265backend =
266 266
267# Whether to enable asynchronous presentation (Vulkan only)
268# 0 (default): Off, 1: On
269async_presentation =
270
267# Enable graphics API debugging mode. 271# Enable graphics API debugging mode.
268# 0 (default): Disabled, 1: Enabled 272# 0 (default): Disabled, 1: Enabled
269debug = 273debug =